diff options
author | Nowar Gu <nowar100@gmail.com> | 2011-07-01 23:28:45 +0800 |
---|---|---|
committer | Nowar Gu <nowar100@gmail.com> | 2011-07-01 23:37:27 +0800 |
commit | 53d48080e55bf0c99cb7ca9de5b15a084d7324b5 (patch) | |
tree | 98f4e257a61eebb14933d37ddc16678da0a7069d /lib | |
parent | 039a79eb418211573bada57ec3a1edf5a9d6071e (diff) | |
parent | ed5bc470aab7097c30e5f881158112f7830472f3 (diff) | |
download | external_llvm-53d48080e55bf0c99cb7ca9de5b15a084d7324b5.zip external_llvm-53d48080e55bf0c99cb7ca9de5b15a084d7324b5.tar.gz external_llvm-53d48080e55bf0c99cb7ca9de5b15a084d7324b5.tar.bz2 |
Merge upstream to r134237 at Fri. 1st July 2011.
Conflicts:
lib/Target/ARM/ARMCodeEmitter.cpp
Diffstat (limited to 'lib')
322 files changed, 7889 insertions, 7427 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index e57ba78..71e0a83 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -23,6 +23,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeAliasSetPrinterPass(Registry); initializeNoAAPass(Registry); initializeBasicAliasAnalysisPass(Registry); + initializeBlockFrequencyPass(Registry); initializeBranchProbabilityInfoPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); diff --git a/lib/Analysis/BlockFrequency.cpp b/lib/Analysis/BlockFrequency.cpp new file mode 100644 index 0000000..4b86d1d --- /dev/null +++ b/lib/Analysis/BlockFrequency.cpp @@ -0,0 +1,59 @@ +//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequency.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis", + true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis", + true, true) + +char BlockFrequency::ID = 0; + + +BlockFrequency::BlockFrequency() : FunctionPass(ID) { + initializeBlockFrequencyPass(*PassRegistry::getPassRegistry()); + BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); +} + +BlockFrequency::~BlockFrequency() { + delete BFI; +} + +void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<BranchProbabilityInfo>(); + AU.setPreservesAll(); +} + +bool BlockFrequency::runOnFunction(Function &F) { + BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); + BFI->doFunction(&F, &BPI); + return false; +} + +/// getblockFreq - Return block frequency. Never return 0, value must be +/// positive. Please note that initial frequency is equal to 1024. It means that +/// we should not rely on the value itself, but only on the comparison to the +/// other block frequencies. We do this to avoid using of floating points. +/// +uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) { + return BFI->getBlockFreq(BB); +} diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 1a975bf..ab846a2 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMAnalysis AliasSetTracker.cpp Analysis.cpp BasicAliasAnalysis.cpp + BlockFrequency.cpp BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index ef5d03a..6a02535 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -219,7 +219,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, } /// createMemberType - Create debugging information entry for a member. -DIType DIBuilder::createMemberType(StringRef Name, +DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, @@ -227,7 +227,7 @@ DIType DIBuilder::createMemberType(StringRef Name, // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), - File, // Or TheCU ? Ty ? + Scope, MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index ba4419c..7a9dc0f 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,7 +21,6 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" @@ -39,15 +38,6 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(IVUsers, "iv-users", "Induction Variable Users", false, true) -// IVUsers behavior currently depends on this temporary indvars mode. The -// option must be defined upstream from its uses. -namespace llvm { - bool DisableIVRewrite = false; -} -cl::opt<bool, true> DisableIVRewriteOpt( - "disable-iv-rewrite", cl::Hidden, cl::location(llvm::DisableIVRewrite), - cl::desc("Disable canonical induction variable rewriting")); - Pass *llvm::createIVUsersPass() { return new IVUsers(); } @@ -100,11 +90,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (Width > 64 || (TD && !TD->isLegalInteger(Width))) return false; - // We expect Sign/Zero extension to be eliminated from the IR before analyzing - // any downstream uses. - if (DisableIVRewrite && (isa<SExtInst>(I) || isa<ZExtInst>(I))) - return false; - if (!Processed.insert(I)) return true; // Instruction already handled. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 9d78f8b..8709f6b 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2204,15 +2204,15 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, if (TrueVal == FalseVal) return TrueVal; - if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X - return FalseVal; - if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X - return TrueVal; if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y if (isa<Constant>(TrueVal)) return TrueVal; return FalseVal; } + if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X + return FalseVal; + if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X + return TrueVal; return 0; } diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 0549935..530a8bf 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -936,7 +936,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, BasicBlock *Header = L->getHeader(); Builder.SetInsertPoint(Header, Header->begin()); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); - PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv"); + PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), + Twine(IVName) + ".iv"); rememberInstruction(PN); // Create the step instructions and populate the PHI. @@ -972,8 +973,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, } } else { IncV = isNegative ? - Builder.CreateSub(PN, StepV, "lsr.iv.next") : - Builder.CreateAdd(PN, StepV, "lsr.iv.next"); + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); rememberInstruction(IncV); } PN->addIncoming(IncV, Pred); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index dab5aeb..130e3ce 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1783,3 +1783,19 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { } return V; } + +/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer +/// are lifetime markers. +/// +bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); + if (!II) return false; + + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } + return true; +} diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index c23351b..6f45216 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -404,7 +404,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = MI->getDesc().OpInfo[i].getRegClass(TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -479,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = MI->getDesc().OpInfo[i].getRegClass(TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index bfee679..2cdf272 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -584,6 +584,8 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } } else if (MI->getOperand(0).isImm()) { OS << MI->getOperand(0).getImm(); + } else if (MI->getOperand(0).isCImm()) { + MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); } else { assert(MI->getOperand(0).isReg() && "Unknown operand type"); if (MI->getOperand(0).getReg() == 0) { diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index bff1a35..1fe035e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -491,7 +491,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { } /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI, +bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { unsigned CIBitWidth = CI->getBitWidth(); if (CIBitWidth <= 64) { diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 60a9b28..213c7fc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -181,7 +181,7 @@ public: /// addConstantValue - Add constant value entry in variable DIE. bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); + bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. bool addConstantFPValue(DIE *Die, const MachineOperand &MO); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8845bfa..f85a82d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -618,6 +618,21 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { return ScopeDIE; } +/// isUnsignedDIType - Return true if type encoding is unsigned. +static bool isUnsignedDIType(DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.Verify()) + return isUnsignedDIType(DTy.getTypeDerivedFrom()); + + DIBasicType BTy(Ty); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} /// constructVariableDIE - Construct a DIE for the given DbgVariable. DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { @@ -718,6 +733,11 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { else if (DVInsn->getOperand(0).isFPImm()) updated = VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + updated = + VariableCU->addConstantValue(VariableDie, + DVInsn->getOperand(0).getCImm(), + isUnsignedDIType(DV->getType())); } else { VariableCU->addVariableAddress(DV, VariableDie, Asm->getDebugValueLocation(DVInsn)); @@ -913,22 +933,6 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { return I->second; } -/// isUnsignedDIType - Return true if type encoding is unsigned. -static bool isUnsignedDIType(DIType Ty) { - DIDerivedType DTy(Ty); - if (DTy.Verify()) - return isUnsignedDIType(DTy.getTypeDerivedFrom()); - - DIBasicType BTy(Ty); - if (BTy.Verify()) { - unsigned Encoding = BTy.getEncoding(); - if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char) - return true; - } - return false; -} - // Return const exprssion if value is a GEP to access merged global // constant. e.g. // i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) @@ -1017,7 +1021,7 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { } else { TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } - } else if (ConstantInt *CI = + } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index d95f77e..4df7b46 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -421,10 +421,10 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, for (; I != E; ++I) { if (I->isDebugValue()) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall()) + const MCInstrDesc &MCID = I->getDesc(); + if (MCID.isCall()) Time += 10; - else if (TID.mayLoad() || TID.mayStore()) + else if (MCID.mayLoad() || MCID.mayStore()) Time += 2; else ++Time; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index aef4ff2..92319c8 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -59,7 +59,6 @@ add_llvm_library(LLVMCodeGen Passes.cpp PeepholeOptimizer.cpp PostRASchedulerList.cpp - PreAllocSplitting.cpp ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp @@ -79,7 +78,6 @@ add_llvm_library(LLVMCodeGen ScoreboardHazardRecognizer.cpp ShadowStackGC.cpp ShrinkWrapping.cpp - SimpleRegisterCoalescing.cpp SjLjEHPrepare.cpp SlotIndexes.cpp Spiller.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 5d722ee..e6b3bbc 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -188,6 +188,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); const TargetRegisterClass *OldRC = MRI.getRegClass(reg); const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); @@ -202,8 +203,11 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { // TRI doesn't have accurate enough information to model this yet. if (I.getOperand().getSubReg()) return; + // Inline asm instuctions don't remember their constraints. + if (I->isInlineAsm()) + return; const TargetRegisterClass *OpRC = - I->getDesc().getRegClass(I.getOperandNo(), TRI); + TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI); if (OpRC) NewRC = getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 515e6f9..489746c 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -37,13 +37,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeOptimizePHIsPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); - initializePreAllocSplittingPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); initializeRALinScanPass(Registry); - initializeRegisterCoalescerAnalysisGroup(Registry); + initializeRegisterCoalescerPass(Registry); initializeRenderMachineFunctionPass(Registry); - initializeSimpleRegisterCoalescingPass(Registry); initializeSlotIndexesPass(Registry); initializeLoopSplitterPass(Registry); initializeStackProtectorPass(Registry); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 4cac453..84c4d59 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -207,7 +207,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -295,7 +295,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index fdc1d91..6de6c0c 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -110,9 +110,14 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { LivePhysRegs.set(Reg); } - // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs - // are not live across blocks, but some targets (x86) can have flags live - // out of a block. + // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not + // live across blocks, but some targets (x86) can have flags live out of a + // block. + for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(), + E = MBB->succ_end(); S != E; S++) + for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin(); + LI != (*S)->livein_end(); LI++) + LivePhysRegs.set(*LI); // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index ebc2fc9..a67140e 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -62,8 +62,8 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = MBBI++; // If MI is a pseudo, expand it. - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.usesCustomInsertionHook()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.usesCustomInsertionHook()) { Changed = true; MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 8b2c981..c918bf6 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -18,8 +18,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -651,12 +651,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (I->isDebugValue()) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isNotDuplicable()) + const MCInstrDesc &MCID = I->getDesc(); + if (MCID.isNotDuplicable()) BBI.CannotBeCopied = true; bool isPredicated = TII->isPredicated(I); - bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch(); + bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch(); if (!isCondBr) { if (!isPredicated) { @@ -1414,9 +1414,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { - const TargetInstrDesc &TID = I->getDesc(); + const MCInstrDesc &MCID = I->getDesc(); // Do not copy the end of the block branches. - if (IgnoreBr && TID.isBranch()) + if (IgnoreBr && MCID.isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 19ae333..0273891 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -180,11 +180,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass, /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, /// otherwise return 0. static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { - if (!MI->isCopy()) - return 0; - if (MI->getOperand(0).getSubReg() != 0) - return 0; - if (MI->getOperand(1).getSubReg() != 0) + if (!MI->isFullCopy()) return 0; if (MI->getOperand(0).getReg() == Reg) return MI->getOperand(1).getReg(); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index c0f71d2..8f0fb46 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -22,7 +22,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Assembly/Writer.h" diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index f97ccf6..3a60a37 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -260,12 +260,12 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { return false; // Ignore stuff that we obviously can't move. - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() || MI->hasUnmodeledSideEffects()) return false; - if (TID.mayLoad()) { + if (MCID.mayLoad()) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 50750a5..cd25156 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -152,10 +152,10 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { /// of `new MachineInstr'. /// MachineInstr * -MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID, +MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp) { return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) - MachineInstr(TID, DL, NoImp); + MachineInstr(MCID, DL, NoImp); } /// CloneMachineInstr - Create a new MachineInstr which is a copy of the diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 36b0b83..0995106 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -24,10 +24,10 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" @@ -267,6 +267,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_Immediate: OS << getImm(); break; + case MachineOperand::MO_CImmediate: + getCImm()->getValue().print(OS, false); + break; case MachineOperand::MO_FPImmediate: if (getFPImm()->getType()->isFloatTy()) OS << getFPImm()->getValueAPF().convertToFloat(); @@ -454,9 +457,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { //===----------------------------------------------------------------------===// /// MachineInstr ctor - This constructor creates a dummy MachineInstr with -/// TID NULL and no operands. +/// MCID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { // Make sure that we get added to a machine basicblock @@ -464,23 +467,23 @@ MachineInstr::MachineInstr() } void MachineInstr::addImplicitDefUseOperands() { - if (TID->ImplicitDefs) - for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) + if (MCID->ImplicitDefs) + for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs) addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); - if (TID->ImplicitUses) - for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) + if (MCID->ImplicitUses) + for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses) addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } /// MachineInstr ctor - This constructor creates a MachineInstr and adds the /// implicit operands. It reserves space for the number of operands specified by -/// the TargetInstrDesc. -MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), +/// the MCInstrDesc. +MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { if (!NoImp) - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -488,13 +491,13 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) } /// MachineInstr ctor - As above, but with a DebugLoc. -MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, +MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { if (!NoImp) - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -504,12 +507,12 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// MachineInstr ctor - Work exactly the same as the ctor two above, except /// that the MachineInstr is created and added to the end of the specified /// basic block. -MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -519,12 +522,12 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr ctor - As above, but with a DebugLoc. /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, - const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + const MCInstrDesc &tid) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -534,7 +537,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -621,7 +624,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { Operands.back().AddRegOperandToRegInfo(RegInfo); // If the register operand is flagged as early, mark the operand as such unsigned OpNo = Operands.size() - 1; - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } return; @@ -643,7 +646,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { Operands[OpNo].AddRegOperandToRegInfo(0); // If the register operand is flagged as early, mark the operand as such - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } @@ -668,7 +671,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { Operands[OpNo].AddRegOperandToRegInfo(RegInfo); // If the register operand is flagged as early, mark the operand as such - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } @@ -691,7 +694,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { // If the register operand is flagged as early, mark the operand as such if (Operands[OpNo].isReg() - && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } } @@ -817,8 +820,8 @@ void MachineInstr::eraseFromParent() { /// OperandComplete - Return true if it's illegal to add a new operand /// bool MachineInstr::OperandsComplete() const { - unsigned short NumOperands = TID->getNumOperands(); - if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) + unsigned short NumOperands = MCID->getNumOperands(); + if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) return true; // Broken: we have all the operands of this instruction! return false; } @@ -826,8 +829,8 @@ bool MachineInstr::OperandsComplete() const { /// getNumExplicitOperands - Returns the number of non-implicit operands. /// unsigned MachineInstr::getNumExplicitOperands() const { - unsigned NumOperands = TID->getNumOperands(); - if (!TID->isVariadic()) + unsigned NumOperands = MCID->getNumOperands(); + if (!MCID->isVariadic()) return NumOperands; for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { @@ -928,10 +931,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, /// operand list that is used to represent the predicate. It returns -1 if /// none is found. int MachineInstr::findFirstPredOperandIdx() const { - const TargetInstrDesc &TID = getDesc(); - if (TID.isPredicable()) { + const MCInstrDesc &MCID = getDesc(); + if (MCID.isPredicable()) { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (TID.OpInfo[i].isPredicate()) + if (MCID.OpInfo[i].isPredicate()) return i; } @@ -987,11 +990,11 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { } assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); - const TargetInstrDesc &TID = getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MCInstrDesc &MCID = getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (MO.isReg() && MO.isUse() && - TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) { + MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) { if (UseOpIdx) *UseOpIdx = (unsigned)i; return true; @@ -1047,13 +1050,13 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { return false; } - const TargetInstrDesc &TID = getDesc(); - if (UseOpIdx >= TID.getNumOperands()) + const MCInstrDesc &MCID = getDesc(); + if (UseOpIdx >= MCID.getNumOperands()) return false; const MachineOperand &MO = getOperand(UseOpIdx); if (!MO.isReg() || !MO.isUse()) return false; - int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO); + int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO); if (DefIdx == -1) return false; if (DefOpIdx) @@ -1093,11 +1096,11 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { /// copyPredicates - Copies predicate operand(s) from MI. void MachineInstr::copyPredicates(const MachineInstr *MI) { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isPredicable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isPredicable()) return; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (TID.OpInfo[i].isPredicate()) { + if (MCID.OpInfo[i].isPredicate()) { // Predicated operands must be last operands. addOperand(MI->getOperand(i)); } @@ -1134,13 +1137,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (TID->mayStore() || TID->isCall()) { + if (MCID->mayStore() || MCID->isCall()) { SawStore = true; return false; } if (isLabel() || isDebugValue() || - TID->isTerminator() || hasUnmodeledSideEffects()) + MCID->isTerminator() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1148,7 +1151,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (TID->mayLoad() && !isInvariantLoad(AA)) + if (MCID->mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); @@ -1188,9 +1191,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, /// have no volatile memory references. bool MachineInstr::hasVolatileMemoryRef() const { // An instruction known never to access memory won't have a volatile access. - if (!TID->mayStore() && - !TID->mayLoad() && - !TID->isCall() && + if (!MCID->mayStore() && + !MCID->mayLoad() && + !MCID->isCall() && !hasUnmodeledSideEffects()) return false; @@ -1214,7 +1217,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { /// *all* loads the instruction does are invariant (if it does multiple loads). bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. - if (!TID->mayLoad()) + if (!MCID->mayLoad()) return false; // If the instruction has lost its memoperands, conservatively assume that @@ -1364,6 +1367,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // Print the rest of the operands. bool OmittedAnyCallClobbers = false; bool FirstOp = true; + unsigned AsmDescOp = ~0u; + unsigned AsmOpCount = 0; if (isInlineAsm()) { // Print asm string. @@ -1377,7 +1382,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; - StartOp = InlineAsm::MIOp_FirstOperand; + StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; FirstOp = false; } @@ -1416,10 +1421,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (FirstOp) FirstOp = false; else OS << ","; OS << " "; if (i < getDesc().NumOperands) { - const TargetOperandInfo &TOI = getDesc().OpInfo[i]; - if (TOI.isPredicate()) + const MCOperandInfo &MCOI = getDesc().OpInfo[i]; + if (MCOI.isPredicate()) OS << "pred:"; - if (TOI.isOptionalDef()) + if (MCOI.isOptionalDef()) OS << "opt:"; } if (isDebugValue() && MO.isMetadata()) { @@ -1431,6 +1436,26 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { MO.print(OS, TM); } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); + } else if (i == AsmDescOp && MO.isImm()) { + // Pretty print the inline asm operand descriptor. + OS << '$' << AsmOpCount++; + unsigned Flag = MO.getImm(); + switch (InlineAsm::getKind(Flag)) { + case InlineAsm::Kind_RegUse: OS << ":[reguse]"; break; + case InlineAsm::Kind_RegDef: OS << ":[regdef]"; break; + case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break; + case InlineAsm::Kind_Clobber: OS << ":[clobber]"; break; + case InlineAsm::Kind_Imm: OS << ":[imm]"; break; + case InlineAsm::Kind_Mem: OS << ":[mem]"; break; + default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break; + } + + unsigned TiedTo = 0; + if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo)) + OS << " [tiedto:$" << TiedTo << ']'; + + // Compute the index of the next operand descriptor. + AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); } else MO.print(OS, TM); } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index b315702..722ceb2 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -28,10 +28,10 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" @@ -1018,9 +1018,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { /*UnfoldStore=*/false, &LoadRegIndex); if (NewOpc == 0) return 0; - const TargetInstrDesc &TID = TII->get(NewOpc); - if (TID.getNumDefs() != 1) return 0; - const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); + const MCInstrDesc &MID = TII->get(NewOpc); + if (MID.getNumDefs() != 1) return 0; + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 08ff5bb..4b3e64c 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -20,7 +20,6 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()]; UsedPhysRegs.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -38,25 +37,13 @@ MachineRegisterInfo::~MachineRegisterInfo() { "PhysRegUseDefLists has entries after all instructions are deleted"); #endif delete [] PhysRegUseDefLists; - delete [] RegClass2VRegMap; } /// setRegClass - Set the register class of the specified virtual register. /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { - const TargetRegisterClass *OldRC = VRegInfo[Reg].first; VRegInfo[Reg].first = RC; - - // Remove from old register class's vregs list. This may be slow but - // fortunately this operation is rarely needed. - std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()]; - std::vector<unsigned>::iterator I = - std::find(VRegs.begin(), VRegs.end(), Reg); - VRegs.erase(I); - - // Add to new register class's vregs list. - RegClass2VRegMap[RC->getID()].push_back(Reg); } const TargetRegisterClass * @@ -95,7 +82,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) // The vector reallocated, handle this now. HandleVRegListReallocation(); - RegClass2VRegMap[RegClass->getID()].push_back(Reg); return Reg; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 471463b..7a55852 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -62,6 +62,7 @@ namespace { raw_ostream *OS; const MachineFunction *MF; const TargetMachine *TM; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; @@ -255,6 +256,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; TM = &MF.getTarget(); + TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -387,8 +389,6 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i, void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - // Count the number of landing pad successors. SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), @@ -541,19 +541,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { - const TargetInstrDesc &TI = MI->getDesc(); - if (MI->getNumOperands() < TI.getNumOperands()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); - *OS << TI.getNumOperands() << " operands expected, but " + *OS << MCID.getNumOperands() << " operands expected, but " << MI->getNumExplicitOperands() << " given.\n"; } // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { - if ((*I)->isLoad() && !TI.mayLoad()) + if ((*I)->isLoad() && !MCID.mayLoad()) report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !TI.mayStore()) + if ((*I)->isStore() && !MCID.mayStore()) report("Missing mayStore flag", MI); } @@ -575,29 +575,30 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); - const TargetInstrDesc &TI = MI->getDesc(); - const TargetOperandInfo &TOI = TI.OpInfo[MONum]; + const MCInstrDesc &MCID = MI->getDesc(); + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; - // The first TI.NumDefs operands must be explicit register defines - if (MONum < TI.getNumDefs()) { + // The first MCID.NumDefs operands must be explicit register defines + if (MONum < MCID.getNumDefs()) { if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); else if (!MO->isDef()) report("Explicit definition marked as use", MO, MONum); else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); - } else if (MONum < TI.getNumOperands()) { + } else if (MONum < MCID.getNumOperands()) { // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. - if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) { - if (MO->isDef() && !TOI.isOptionalDef()) + if (MO->isReg() && + !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) { + if (MO->isDef() && !MCOI.isOptionalDef()) report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. - if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic() && MO->getReg()) + if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg()) report("Extra explicit operand on non-variadic instruction", MO, MONum); } @@ -709,7 +710,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } // Check register classes. - if (MONum < TI.getNumOperands() && !MO->isImplicit()) { + if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { unsigned SubIdx = MO->getSubReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -723,7 +724,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } sr = s; } - if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { if (!DRC->contains(sr)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(sr) << " is not a " @@ -743,7 +744,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } RC = SRC; } - if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); *OS << "Expected a " << DRC->getName() << " register, but got a " @@ -765,11 +766,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { LiveInts && !LiveInts->isNotInMIMap(MI)) { LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { + if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { report("Instruction loads from dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } - if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) { + if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) { report("Instruction stores to dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index c105bb0..c523e39 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -353,10 +353,10 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isMoveImmediate()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isMoveImmediate()) return false; - if (TID.getNumDefs() != 1) + if (MCID.getNumDefs() != 1) return false; unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -429,16 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); - if (TID.isBitcast()) { + if (MCID.isBitcast()) { if (OptimizeBitcastInstr(MI, MBB)) { // MI is deleted. Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } - } else if (TID.isCompare()) { + } else if (MCID.isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. Changed = true; diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp deleted file mode 100644 index d6e31da..0000000 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ /dev/null @@ -1,1430 +0,0 @@ -//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the machine instruction level pre-register allocation -// live interval splitting pass. It finds live interval barriers, i.e. -// instructions which will kill all physical registers in certain register -// classes, and split all live intervals which cross the barrier. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pre-alloc-split" -#include "VirtRegMap.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterCoalescer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden); -static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), - cl::Hidden); -static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), - cl::Hidden); - -STATISTIC(NumSplits, "Number of intervals split"); -STATISTIC(NumRemats, "Number of intervals split by rematerialization"); -STATISTIC(NumFolds, "Number of intervals split with spill folding"); -STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding"); -STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers"); -STATISTIC(NumDeadSpills, "Number of dead spills removed"); - -namespace { - class PreAllocSplitting : public MachineFunctionPass { - MachineFunction *CurrMF; - const TargetMachine *TM; - const TargetInstrInfo *TII; - const TargetRegisterInfo* TRI; - MachineFrameInfo *MFI; - MachineRegisterInfo *MRI; - SlotIndexes *SIs; - LiveIntervals *LIs; - LiveStacks *LSs; - VirtRegMap *VRM; - - // Barrier - Current barrier being processed. - MachineInstr *Barrier; - - // BarrierMBB - Basic block where the barrier resides in. - MachineBasicBlock *BarrierMBB; - - // Barrier - Current barrier index. - SlotIndex BarrierIdx; - - // CurrLI - Current live interval being split. - LiveInterval *CurrLI; - - // CurrSLI - Current stack slot live interval. - LiveInterval *CurrSLI; - - // CurrSValNo - Current val# for the stack slot live interval. - VNInfo *CurrSValNo; - - // IntervalSSMap - A map from live interval to spill slots. - DenseMap<unsigned, int> IntervalSSMap; - - // Def2SpillMap - A map from a def instruction index to spill index. - DenseMap<SlotIndex, SlotIndex> Def2SpillMap; - - public: - static char ID; - PreAllocSplitting() : MachineFunctionPass(ID) { - initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<LiveStacks>(); - AU.addPreserved<LiveStacks>(); - AU.addPreserved<RegisterCoalescer>(); - AU.addPreserved<CalculateSpillWeights>(); - AU.addPreservedID(StrongPHIEliminationID); - AU.addPreservedID(PHIEliminationID); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<VirtRegMap>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<VirtRegMap>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual void releaseMemory() { - IntervalSSMap.clear(); - Def2SpillMap.clear(); - } - - virtual const char *getPassName() const { - return "Pre-Register Allocaton Live Interval Splitting"; - } - - /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* M = 0) const { - LIs->print(O, M); - } - - - private: - - MachineBasicBlock::iterator - findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, - SmallPtrSet<MachineInstr*, 4>&); - - MachineBasicBlock::iterator - findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex, - SmallPtrSet<MachineInstr*, 4>&); - - int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); - - bool IsAvailableInStack(MachineBasicBlock*, unsigned, - SlotIndex, SlotIndex, - SlotIndex&, int&) const; - - void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex); - - bool SplitRegLiveInterval(LiveInterval*); - - bool SplitRegLiveIntervals(const TargetRegisterClass **, - SmallPtrSet<LiveInterval*, 8>&); - - bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB, - MachineBasicBlock* BarrierMBB); - bool Rematerialize(unsigned vreg, VNInfo* ValNo, - MachineInstr* DefMI, - MachineBasicBlock::iterator RestorePt, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, - MachineInstr* DefMI, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int& SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - MachineInstr* FoldRestore(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - void RenumberValno(VNInfo* VN); - void ReconstructLiveInterval(LiveInterval* LI); - bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split); - unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs, - unsigned Reg, int FrameIndex, bool& TwoAddr); - VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock); - VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock); -}; -} // end anonymous namespace - -char PreAllocSplitting::ID = 0; - -INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting", - "Pre-Register Allocation Live Interval Splitting", - false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting", - "Pre-Register Allocation Live Interval Splitting", - false, false) - -char &llvm::PreAllocSplittingID = PreAllocSplitting::ID; - -/// findSpillPoint - Find a gap as far away from the given MI that's suitable -/// for spilling the current live interval. The index must be before any -/// defs and uses of the live interval register in the mbb. Return begin() if -/// none is found. -MachineBasicBlock::iterator -PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, - MachineInstr *DefMI, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { - MachineBasicBlock::iterator Pt = MBB->begin(); - - MachineBasicBlock::iterator MII = MI; - MachineBasicBlock::iterator EndPt = DefMI - ? MachineBasicBlock::iterator(DefMI) : MBB->begin(); - - while (MII != EndPt && !RefsInMBB.count(MII) && - MII->getOpcode() != TRI->getCallFrameSetupOpcode()) - --MII; - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - - while (MII != EndPt && !RefsInMBB.count(MII)) { - // We can't insert the spill between the barrier (a call), and its - // corresponding call frame setup. - if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) { - while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) { - --MII; - if (MII == EndPt) { - return Pt; - } - } - continue; - } else { - Pt = MII; - } - - if (RefsInMBB.count(MII)) - return Pt; - - - --MII; - } - - return Pt; -} - -/// findRestorePoint - Find a gap in the instruction index map that's suitable -/// for restoring the current live interval value. The index must be before any -/// uses of the live interval register in the mbb. Return end() if none is -/// found. -MachineBasicBlock::iterator -PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, - SlotIndex LastIdx, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { - // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb - // begin index accordingly. - MachineBasicBlock::iterator Pt = MBB->end(); - MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator(); - - // We start at the call, so walk forward until we find the call frame teardown - // since we can't insert restores before that. Bail if we encounter a use - // during this time. - MachineBasicBlock::iterator MII = MI; - if (MII == EndPt) return Pt; - - while (MII != EndPt && !RefsInMBB.count(MII) && - MII->getOpcode() != TRI->getCallFrameDestroyOpcode()) - ++MII; - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - ++MII; - - // FIXME: Limit the number of instructions to examine to reduce - // compile time? - while (MII != EndPt) { - SlotIndex Index = LIs->getInstructionIndex(MII); - if (Index > LastIdx) - break; - - // We can't insert a restore between the barrier (a call) and its - // corresponding call frame teardown. - if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) { - do { - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - ++MII; - } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); - } else { - Pt = MII; - } - - if (RefsInMBB.count(MII)) - return Pt; - - ++MII; - } - - return Pt; -} - -/// CreateSpillStackSlot - Create a stack slot for the live interval being -/// split. If the live interval was previously split, just reuse the same -/// slot. -int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, - const TargetRegisterClass *RC) { - int SS; - DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); - if (I != IntervalSSMap.end()) { - SS = I->second; - } else { - SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - IntervalSSMap[Reg] = SS; - } - - // Create live interval for stack slot. - CurrSLI = &LSs->getOrCreateInterval(SS, RC); - if (CurrSLI->hasAtLeastOneValue()) - CurrSValNo = CurrSLI->getValNumInfo(0); - else - CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, - LSs->getVNInfoAllocator()); - return SS; -} - -/// IsAvailableInStack - Return true if register is available in a split stack -/// slot at the specified index. -bool -PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, - unsigned Reg, SlotIndex DefIndex, - SlotIndex RestoreIndex, - SlotIndex &SpillIndex, - int& SS) const { - if (!DefMBB) - return false; - - DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg); - if (I == IntervalSSMap.end()) - return false; - DenseMap<SlotIndex, SlotIndex>::const_iterator - II = Def2SpillMap.find(DefIndex); - if (II == Def2SpillMap.end()) - return false; - - // If last spill of def is in the same mbb as barrier mbb (where restore will - // be), make sure it's not below the intended restore index. - // FIXME: Undo the previous spill? - assert(LIs->getMBBFromIndex(II->second) == DefMBB); - if (DefMBB == BarrierMBB && II->second >= RestoreIndex) - return false; - - SS = I->second; - SpillIndex = II->second; - return true; -} - -/// UpdateSpillSlotInterval - Given the specified val# of the register live -/// interval being split, and the spill and restore indicies, update the live -/// interval of the spill stack slot. -void -PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex, - SlotIndex RestoreIndex) { - assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB && - "Expect restore in the barrier mbb"); - - MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex); - if (MBB == BarrierMBB) { - // Intra-block spill + restore. We are done. - LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo); - CurrSLI->addRange(SLR); - return; - } - - SmallPtrSet<MachineBasicBlock*, 4> Processed; - SlotIndex EndIdx = LIs->getMBBEndIdx(MBB); - LiveRange SLR(SpillIndex, EndIdx, CurrSValNo); - CurrSLI->addRange(SLR); - Processed.insert(MBB); - - // Start from the spill mbb, figure out the extend of the spill slot's - // live interval. - SmallVector<MachineBasicBlock*, 4> WorkList; - const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex); - if (LR->end > EndIdx) - // If live range extend beyond end of mbb, add successors to work list. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - WorkList.push_back(*SI); - - while (!WorkList.empty()) { - MachineBasicBlock *MBB = WorkList.back(); - WorkList.pop_back(); - if (Processed.count(MBB)) - continue; - SlotIndex Idx = LIs->getMBBStartIdx(MBB); - LR = CurrLI->getLiveRangeContaining(Idx); - if (LR && LR->valno == ValNo) { - EndIdx = LIs->getMBBEndIdx(MBB); - if (Idx <= RestoreIndex && RestoreIndex < EndIdx) { - // Spill slot live interval stops at the restore. - LiveRange SLR(Idx, RestoreIndex, CurrSValNo); - CurrSLI->addRange(SLR); - } else if (LR->end > EndIdx) { - // Live range extends beyond end of mbb, process successors. - LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo); - CurrSLI->addRange(SLR); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - WorkList.push_back(*SI); - } else { - LiveRange SLR(Idx, LR->end, CurrSValNo); - CurrSLI->addRange(SLR); - } - Processed.insert(MBB); - } - } -} - -/// PerformPHIConstruction - From properly set up use and def lists, use a PHI -/// construction algorithm to compute the ranges and valnos for an interval. -VNInfo* -PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock) { - // Return memoized result if it's available. - if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI)) - return NewVNs[UseI]; - else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI)) - return NewVNs[UseI]; - else if (!IsIntraBlock && LiveOut.count(MBB)) - return LiveOut[MBB]; - - // Check if our block contains any uses or defs. - bool ContainsDefs = Defs.count(MBB); - bool ContainsUses = Uses.count(MBB); - - VNInfo* RetVNI = 0; - - // Enumerate the cases of use/def contaning blocks. - if (!ContainsDefs && !ContainsUses) { - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - } else if (ContainsDefs && !ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; - - // Search for the def in this block. If we don't find it before the - // instruction we care about, go to the fallback case. Note that that - // should never happen: this cannot be intrablock, so use should - // always be an end() iterator. - assert(UseI == MBB->end() && "No use marked in intrablock"); - - MachineBasicBlock::iterator Walker = UseI; - --Walker; - while (Walker != MBB->begin()) { - if (BlockDefs.count(Walker)) - break; - --Walker; - } - - // Once we've found it, extend its VNInfo to our instruction. - SlotIndex DefIndex = LIs->getInstructionIndex(Walker); - DefIndex = DefIndex.getDefIndex(); - SlotIndex EndIndex = LIs->getMBBEndIdx(MBB); - - RetVNI = NewVNs[Walker]; - LI->addRange(LiveRange(DefIndex, EndIndex, RetVNI)); - } else if (!ContainsDefs && ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; - - // Search for the use in this block that precedes the instruction we care - // about, going to the fallback case if we don't find it. - MachineBasicBlock::iterator Walker = UseI; - bool found = false; - while (Walker != MBB->begin()) { - --Walker; - if (BlockUses.count(Walker)) { - found = true; - break; - } - } - - if (!found) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - - SlotIndex UseIndex = LIs->getInstructionIndex(Walker); - UseIndex = UseIndex.getUseIndex(); - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - - // Now, recursively phi construct the VNInfo for the use we found, - // and then extend it to include the instruction we care about - RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, false, true); - - LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI)); - - // FIXME: Need to set kills properly for inter-block stuff. - } else if (ContainsDefs && ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; - SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; - - // This case is basically a merging of the two preceding case, with the - // special note that checking for defs must take precedence over checking - // for uses, because of two-address instructions. - MachineBasicBlock::iterator Walker = UseI; - bool foundDef = false; - bool foundUse = false; - while (Walker != MBB->begin()) { - --Walker; - if (BlockDefs.count(Walker)) { - foundDef = true; - break; - } else if (BlockUses.count(Walker)) { - foundUse = true; - break; - } - } - - if (!foundDef && !foundUse) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - - SlotIndex StartIndex = LIs->getInstructionIndex(Walker); - StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex(); - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - - if (foundDef) - RetVNI = NewVNs[Walker]; - else - RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, false, true); - - LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - } - - // Memoize results so we don't have to recompute them. - if (!IsIntraBlock) LiveOut[MBB] = RetVNI; - else { - if (!NewVNs.count(UseI)) - NewVNs[UseI] = RetVNI; - Visited.insert(UseI); - } - - return RetVNI; -} - -/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path. -/// -VNInfo* -PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock) { - // NOTE: Because this is the fallback case from other cases, we do NOT - // assume that we are not intrablock here. - if (Phis.count(MBB)) return Phis[MBB]; - - SlotIndex StartIndex = LIs->getMBBStartIdx(MBB); - VNInfo *RetVNI = Phis[MBB] = - LI->getNextValue(SlotIndex(), /*FIXME*/ 0, - LIs->getVNInfoAllocator()); - - if (!IsIntraBlock) LiveOut[MBB] = RetVNI; - - // If there are no uses or defs between our starting point and the - // beginning of the block, then recursive perform phi construction - // on our predecessors. - DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, - Visited, Defs, Uses, NewVNs, - LiveOut, Phis, false, false); - if (Incoming != 0) - IncomingVNs[*PI] = Incoming; - } - - if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) { - VNInfo* OldVN = RetVNI; - VNInfo* NewVN = IncomingVNs.begin()->second; - VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN); - if (MergedVN == OldVN) std::swap(OldVN, NewVN); - - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(), - LOE = LiveOut.end(); LOI != LOE; ++LOI) - if (LOI->second == OldVN) - LOI->second = MergedVN; - for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(), - NVE = NewVNs.end(); NVI != NVE; ++NVI) - if (NVI->second == OldVN) - NVI->second = MergedVN; - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(), - PE = Phis.end(); PI != PE; ++PI) - if (PI->second == OldVN) - PI->second = MergedVN; - RetVNI = MergedVN; - } else { - // Otherwise, merge the incoming VNInfos with a phi join. Create a new - // VNInfo to represent the joined value. - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I = - IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { - I->second->setHasPHIKill(true); - } - } - - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - - // Memoize results so we don't have to recompute them. - if (!IsIntraBlock) - LiveOut[MBB] = RetVNI; - else { - if (!NewVNs.count(UseI)) - NewVNs[UseI] = RetVNI; - Visited.insert(UseI); - } - - return RetVNI; -} - -/// ReconstructLiveInterval - Recompute a live interval from scratch. -void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { - VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator(); - - // Clear the old ranges and valnos; - LI->clear(); - - // Cache the uses and defs of the register - typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap; - RegMap Defs, Uses; - - // Keep track of the new VNs we're creating. - DenseMap<MachineInstr*, VNInfo*> NewVNs; - SmallPtrSet<VNInfo*, 2> PhiVNs; - - // Cache defs, and create a new VNInfo for each def. - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - Defs[(*DI).getParent()].insert(&*DI); - - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = DefIdx.getDefIndex(); - - assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting."); - VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc); - - // If the def is a move, set the copy field. - if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) - NewVN->setCopy(&*DI); - - NewVNs[&*DI] = NewVN; - } - - // Cache uses as a separate pass from actually processing them. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), - UE = MRI->use_end(); UI != UE; ++UI) - Uses[(*UI).getParent()].insert(&*UI); - - // Now, actually process every use and use a phi construction algorithm - // to walk from it to its reaching definitions, building VNInfos along - // the way. - DenseMap<MachineBasicBlock*, VNInfo*> LiveOut; - DenseMap<MachineBasicBlock*, VNInfo*> Phis; - SmallPtrSet<MachineInstr*, 4> Visited; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), - UE = MRI->use_end(); UI != UE; ++UI) { - PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, true, true); - } - - // Add ranges for dead defs - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = DefIdx.getDefIndex(); - - if (LI->liveAt(DefIdx)) continue; - - VNInfo* DeadVN = NewVNs[&*DI]; - LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN)); - } -} - -/// RenumberValno - Split the given valno out into a new vreg, allowing it to -/// be allocated to a different register. This function creates a new vreg, -/// copies the valno and its live ranges over to the new vreg's interval, -/// removes them from the old interval, and rewrites all uses and defs of -/// the original reg to the new vreg within those ranges. -void PreAllocSplitting::RenumberValno(VNInfo* VN) { - SmallVector<VNInfo*, 4> Stack; - SmallVector<VNInfo*, 4> VNsToCopy; - Stack.push_back(VN); - - // Walk through and copy the valno we care about, and any other valnos - // that are two-address redefinitions of the one we care about. These - // will need to be rewritten as well. We also check for safety of the - // renumbering here, by making sure that none of the valno involved has - // phi kills. - while (!Stack.empty()) { - VNInfo* OldVN = Stack.back(); - Stack.pop_back(); - - // Bail out if we ever encounter a valno that has a PHI kill. We can't - // renumber these. - if (OldVN->hasPHIKill()) return; - - VNsToCopy.push_back(OldVN); - - // Locate two-address redefinitions - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue; - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex(); - VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx); - if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) != - VNsToCopy.end()) - Stack.push_back(NextVN); - } - } - - // Create the new vreg - unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg)); - - // Create the new live interval - LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg); - - for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE = - VNsToCopy.end(); OI != OE; ++OI) { - VNInfo* OldVN = *OI; - - // Copy the valno over - VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator()); - NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN); - - // Remove the valno from the old interval - CurrLI->removeValNo(OldVN); - } - - // Rewrite defs and uses. This is done in two stages to avoid invalidating - // the reg_iterator. - SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange; - - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand& MO = I.getOperand(); - SlotIndex InstrIdx = LIs->getInstructionIndex(&*I); - - if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) || - (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex()))) - OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo())); - } - - for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I = - OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) { - MachineInstr* Inst = I->first; - unsigned OpIdx = I->second; - MachineOperand& MO = Inst->getOperand(OpIdx); - MO.setReg(NewVReg); - } - - // Grow the VirtRegMap, since we've created a new vreg. - VRM->grow(); - - // The renumbered vreg shares a stack slot with the old register. - if (IntervalSSMap.count(CurrLI->reg)) - IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; - - ++NumRenumbers; -} - -bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, - MachineInstr* DefMI, - MachineBasicBlock::iterator RestorePt, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - MachineBasicBlock& MBB = *RestorePt->getParent(); - - MachineBasicBlock::iterator KillPt = BarrierMBB->end(); - if (!DefMI || DefMI->getParent() == BarrierMBB) - KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); - else - KillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); - - if (KillPt == DefMI->getParent()->end()) - return false; - - TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI); - SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); - - ReconstructLiveInterval(CurrLI); - RematIdx = RematIdx.getDefIndex(); - RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx)); - - ++NumSplits; - ++NumRemats; - return true; -} - -MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* DefMI, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int& SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - // Go top down if RefsInMBB is empty. - if (RefsInMBB.empty()) - return 0; - - MachineBasicBlock::iterator FoldPt = Barrier; - while (&*FoldPt != DefMI && FoldPt != MBB->begin() && - !RefsInMBB.count(FoldPt)) - --FoldPt; - - int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg); - if (OpIdx == -1) - return 0; - - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - - if (!TII->canFoldMemoryOperand(FoldPt, Ops)) - return 0; - - DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg); - if (I != IntervalSSMap.end()) { - SS = I->second; - } else { - SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - } - - MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); - - if (FMI) { - LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FoldPt->eraseFromParent(); - ++NumFolds; - - IntervalSSMap[vreg] = SS; - CurrSLI = &LSs->getOrCreateInterval(SS, RC); - if (CurrSLI->hasAtLeastOneValue()) - CurrSValNo = CurrSLI->getValNumInfo(0); - else - CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, - LSs->getVNInfoAllocator()); - } - - return FMI; -} - -MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds) - return 0; - - // Go top down if RefsInMBB is empty. - if (RefsInMBB.empty()) - return 0; - - // Can't fold a restore between a call stack setup and teardown. - MachineBasicBlock::iterator FoldPt = Barrier; - - // Advance from barrier to call frame teardown. - while (FoldPt != MBB->getFirstTerminator() && - FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { - if (RefsInMBB.count(FoldPt)) - return 0; - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - else - ++FoldPt; - - // Now find the restore point. - while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) { - if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) { - while (FoldPt != MBB->getFirstTerminator() && - FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { - if (RefsInMBB.count(FoldPt)) - return 0; - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - } - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - - int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true); - if (OpIdx == -1) - return 0; - - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - - if (!TII->canFoldMemoryOperand(FoldPt, Ops)) - return 0; - - MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); - - if (FMI) { - LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FoldPt->eraseFromParent(); - ++NumRestoreFolds; - } - - return FMI; -} - -/// SplitRegLiveInterval - Split (spill and restore) the given live interval -/// so it would not cross the barrier that's being processed. Shrink wrap -/// (minimize) the live interval to the last uses. -bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { - DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier - << " result: "); - - CurrLI = LI; - - // Find live range where current interval cross the barrier. - LiveInterval::iterator LR = - CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex()); - VNInfo *ValNo = LR->valno; - - assert(!ValNo->isUnused() && "Val# is defined by a dead def?"); - - MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def); - - // If this would create a new join point, do not split. - if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) { - DEBUG(dbgs() << "FAILED (would create a new join point).\n"); - return false; - } - - // Find all references in the barrier mbb. - SmallPtrSet<MachineInstr*, 4> RefsInMBB; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), - E = MRI->reg_end(); I != E; ++I) { - MachineInstr *RefMI = &*I; - if (RefMI->getParent() == BarrierMBB) - RefsInMBB.insert(RefMI); - } - - // Find a point to restore the value after the barrier. - MachineBasicBlock::iterator RestorePt = - findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB); - if (RestorePt == BarrierMBB->end()) { - DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n"); - return false; - } - - if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) - if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) { - DEBUG(dbgs() << "success (remat).\n"); - return true; - } - - // Add a spill either before the barrier or after the definition. - MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL; - const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg); - SlotIndex SpillIndex; - MachineInstr *SpillMI = NULL; - int SS = -1; - if (!DefMI) { - // If we don't know where the def is we must split just before the barrier. - if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier, - BarrierMBB, SS, RefsInMBB))) { - SpillIndex = LIs->getInstructionIndex(SpillMI); - } else { - MachineBasicBlock::iterator SpillPt = - findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); - if (SpillPt == BarrierMBB->begin()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - // Add spill. - - SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC, - TRI); - SpillMI = prior(SpillPt); - SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); - } - } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def, - LIs->getZeroIndex(), SpillIndex, SS)) { - // If it's already split, just restore the value. There is no need to spill - // the def again. - if (!DefMI) { - DEBUG(dbgs() << "FAILED (def is dead).\n"); - return false; // Def is dead. Do nothing. - } - - if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier, - BarrierMBB, SS, RefsInMBB))) { - SpillIndex = LIs->getInstructionIndex(SpillMI); - } else { - // Check if it's possible to insert a spill after the def MI. - MachineBasicBlock::iterator SpillPt; - if (DefMBB == BarrierMBB) { - // Add spill after the def and the last use before the barrier. - SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, - RefsInMBB); - if (SpillPt == DefMBB->begin()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - } else { - SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); - if (SpillPt == DefMBB->end()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - } - // Add spill. - SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC, - TRI); - SpillMI = prior(SpillPt); - SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); - } - } - - // Remember def instruction index to spill index mapping. - if (DefMI && SpillMI) - Def2SpillMap[ValNo->def] = SpillIndex; - - // Add restore. - bool FoldedRestore = false; - SlotIndex RestoreIndex; - if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier, - BarrierMBB, SS, RefsInMBB)) { - RestorePt = LMI; - RestoreIndex = LIs->getInstructionIndex(RestorePt); - FoldedRestore = true; - } else { - TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI); - MachineInstr *LoadMI = prior(RestorePt); - RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI); - } - - // Update spill stack slot live interval. - UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(), - RestoreIndex.getDefIndex()); - - ReconstructLiveInterval(CurrLI); - - if (!FoldedRestore) { - SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); - RestoreIdx = RestoreIdx.getDefIndex(); - RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx)); - } - - ++NumSplits; - DEBUG(dbgs() << "success.\n"); - return true; -} - -/// SplitRegLiveIntervals - Split all register live intervals that cross the -/// barrier that's being processed. -bool -PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, - SmallPtrSet<LiveInterval*, 8>& Split) { - // First find all the virtual registers whose live intervals are intercepted - // by the current barrier. - SmallVector<LiveInterval*, 8> Intervals; - for (const TargetRegisterClass **RC = RCs; *RC; ++RC) { - // FIXME: If it's not safe to move any instruction that defines the barrier - // register class, then it means there are some special dependencies which - // codegen is not modelling. Ignore these barriers for now. - if (!TII->isSafeToMoveRegClassDefs(*RC)) - continue; - const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC); - for (unsigned i = 0, e = VRs.size(); i != e; ++i) { - unsigned Reg = VRs[i]; - if (!LIs->hasInterval(Reg)) - continue; - LiveInterval *LI = &LIs->getInterval(Reg); - if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg)) - // Virtual register live interval is intercepted by the barrier. We - // should split and shrink wrap its interval if possible. - Intervals.push_back(LI); - } - } - - // Process the affected live intervals. - bool Change = false; - while (!Intervals.empty()) { - if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit) - break; - LiveInterval *LI = Intervals.back(); - Intervals.pop_back(); - bool result = SplitRegLiveInterval(LI); - if (result) Split.insert(LI); - Change |= result; - } - - return Change; -} - -unsigned PreAllocSplitting::getNumberOfNonSpills( - SmallPtrSet<MachineInstr*, 4>& MIs, - unsigned Reg, int FrameIndex, - bool& FeedsTwoAddr) { - unsigned NonSpills = 0; - for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end(); - UI != UE; ++UI) { - int StoreFrameIndex; - unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) - ++NonSpills; - - int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) - FeedsTwoAddr = true; - } - - return NonSpills; -} - -/// removeDeadSpills - After doing splitting, filter through all intervals we've -/// split, and see if any of the spills are unnecessary. If so, remove them. -bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { - bool changed = false; - - // Walk over all of the live intervals that were touched by the splitter, - // and see if we can do any DCE and/or folding. - for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(), - LE = split.end(); LI != LE; ++LI) { - DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount; - - // First, collect all the uses of the vreg, and sort them by their - // reaching definition (VNInfo). - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg), - UE = MRI->use_end(); UI != UE; ++UI) { - SlotIndex index = LIs->getInstructionIndex(&*UI); - index = index.getUseIndex(); - - const LiveRange* LR = (*LI)->getLiveRangeContaining(index); - VNUseCount[LR->valno].insert(&*UI); - } - - // Now, take the definitions (VNInfo's) one at a time and try to DCE - // and/or fold them away. - for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(), - VE = (*LI)->vni_end(); VI != VE; ++VI) { - - if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) - return changed; - - VNInfo* CurrVN = *VI; - - // We don't currently try to handle definitions with PHI kills, because - // it would involve processing more than one VNInfo at once. - if (CurrVN->hasPHIKill()) continue; - - // We also don't try to handle the results of PHI joins, since there's - // no defining instruction to analyze. - MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def); - if (!DefMI || CurrVN->isUnused()) continue; - - // We're only interested in eliminating cruft introduced by the splitter, - // is of the form load-use or load-use-store. First, check that the - // definition is a load, and remember what stack slot we loaded it from. - int FrameIndex; - if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue; - - // If the definition has no uses at all, just DCE it. - if (VNUseCount[CurrVN].size() == 0) { - LIs->RemoveMachineInstrFromMaps(DefMI); - (*LI)->removeValNo(CurrVN); - DefMI->eraseFromParent(); - VNUseCount.erase(CurrVN); - ++NumDeadSpills; - changed = true; - continue; - } - - // Second, get the number of non-store uses of the definition, as well as - // a flag indicating whether it feeds into a later two-address definition. - bool FeedsTwoAddr = false; - unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN], - (*LI)->reg, FrameIndex, - FeedsTwoAddr); - - // If there's one non-store use and it doesn't feed a two-addr, then - // this is a load-use-store case that we can try to fold. - if (NonSpillCount == 1 && !FeedsTwoAddr) { - // Start by finding the non-store use MachineInstr. - SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin(); - int StoreFrameIndex; - unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - while (UI != VNUseCount[CurrVN].end() && - (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) { - ++UI; - if (UI != VNUseCount[CurrVN].end()) - StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - } - if (UI == VNUseCount[CurrVN].end()) continue; - - MachineInstr* use = *UI; - - // Attempt to fold it away! - int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false); - if (OpIdx == -1) continue; - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - if (!TII->canFoldMemoryOperand(use, Ops)) continue; - - MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex); - - if (!NewMI) continue; - - // Update relevant analyses. - LIs->RemoveMachineInstrFromMaps(DefMI); - LIs->ReplaceMachineInstrInMaps(use, NewMI); - (*LI)->removeValNo(CurrVN); - - DefMI->eraseFromParent(); - use->eraseFromParent(); - VNUseCount[CurrVN].erase(use); - - // Remove deleted instructions. Note that we need to remove them from - // the VNInfo->use map as well, just to be safe. - for (SmallPtrSet<MachineInstr*, 4>::iterator II = - VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end(); - II != IE; ++II) { - for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator - VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; - ++VNI) - if (VNI->first != CurrVN) - VNI->second.erase(*II); - LIs->RemoveMachineInstrFromMaps(*II); - (*II)->eraseFromParent(); - } - - VNUseCount.erase(CurrVN); - - for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator - VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI) - if (VI->second.erase(use)) - VI->second.insert(NewMI); - - ++NumDeadSpills; - changed = true; - continue; - } - - // If there's more than one non-store instruction, we can't profitably - // fold it, so bail. - if (NonSpillCount) continue; - - // Otherwise, this is a load-store case, so DCE them. - for (SmallPtrSet<MachineInstr*, 4>::iterator UI = - VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end(); - UI != UE; ++UI) { - LIs->RemoveMachineInstrFromMaps(*UI); - (*UI)->eraseFromParent(); - } - - VNUseCount.erase(CurrVN); - - LIs->RemoveMachineInstrFromMaps(DefMI); - (*LI)->removeValNo(CurrVN); - DefMI->eraseFromParent(); - ++NumDeadSpills; - changed = true; - } - } - - return changed; -} - -bool PreAllocSplitting::createsNewJoin(LiveRange* LR, - MachineBasicBlock* DefMBB, - MachineBasicBlock* BarrierMBB) { - if (DefMBB == BarrierMBB) - return false; - - if (LR->valno->hasPHIKill()) - return false; - - SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB); - if (LR->end < MBBEnd) - return false; - - MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>(); - if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB)) - return true; - - MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); - SmallPtrSet<MachineBasicBlock*, 4> Visited; - typedef std::pair<MachineBasicBlock*, - MachineBasicBlock::succ_iterator> ItPair; - SmallVector<ItPair, 4> Stack; - Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin())); - - while (!Stack.empty()) { - ItPair P = Stack.back(); - Stack.pop_back(); - - MachineBasicBlock* PredMBB = P.first; - MachineBasicBlock::succ_iterator S = P.second; - - if (S == PredMBB->succ_end()) - continue; - else if (Visited.count(*S)) { - Stack.push_back(std::make_pair(PredMBB, ++S)); - continue; - } else - Stack.push_back(std::make_pair(PredMBB, S+1)); - - MachineBasicBlock* MBB = *S; - Visited.insert(MBB); - - if (MBB == BarrierMBB) - return true; - - MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB); - MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB); - MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom(); - while (MDTN) { - if (MDTN == DefMDTN) - return true; - else if (MDTN == BarrierMDTN) - break; - MDTN = MDTN->getIDom(); - } - - MBBEnd = LIs->getMBBEndIdx(MBB); - if (LR->end > MBBEnd) - Stack.push_back(std::make_pair(MBB, MBB->succ_begin())); - } - - return false; -} - - -bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) { - CurrMF = &MF; - TM = &MF.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); - MFI = MF.getFrameInfo(); - MRI = &MF.getRegInfo(); - SIs = &getAnalysis<SlotIndexes>(); - LIs = &getAnalysis<LiveIntervals>(); - LSs = &getAnalysis<LiveStacks>(); - VRM = &getAnalysis<VirtRegMap>(); - - bool MadeChange = false; - - // Make sure blocks are numbered in order. - MF.RenumberBlocks(); - - MachineBasicBlock *Entry = MF.begin(); - SmallPtrSet<MachineBasicBlock*,16> Visited; - - SmallPtrSet<LiveInterval*, 8> Split; - - for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - BarrierMBB = *DFI; - for (MachineBasicBlock::iterator I = BarrierMBB->begin(), - E = BarrierMBB->end(); I != E; ++I) { - Barrier = &*I; - const TargetRegisterClass **BarrierRCs = - Barrier->getDesc().getRegClassBarriers(); - if (!BarrierRCs) - continue; - BarrierIdx = LIs->getInstructionIndex(Barrier); - MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split); - } - } - - MadeChange |= removeDeadSpills(Split); - - return MadeChange; -} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index f1f3c99..a901c5f 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -145,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -152,8 +153,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode - int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); - int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); // Early exit for targets which have no call frame setup/destroy pseudo // instructions. @@ -705,12 +706,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; - int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); - int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 1d77b29..bcb38d7 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -20,6 +20,7 @@ #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" +#include "RegisterCoalescer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -34,7 +35,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -141,7 +141,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 0f27dfc..ee23194 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -118,7 +118,7 @@ namespace { // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to // mark all the clobbered registers as used by the function. - SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs; + SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs; // isBulkSpilling - This flag is set when LiveRegMap will be cleared // completely after spilling all live registers. LiveRegMap entries should @@ -423,7 +423,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { if (UsedInInstr.test(PhysReg)) { - DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n"); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } switch (unsigned VirtReg = PhysRegState[PhysReg]) { @@ -432,15 +432,15 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { case regFree: return 0; case regReserved: - DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: " - << PhysReg << " is reserved already.\n"); + DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " + << PrintReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; } // This is a disabled register, add up cost of aliases. - DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n"); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { @@ -515,7 +515,7 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { unsigned BestReg = 0, BestCost = spillImpossible; for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned Cost = calcSpillCost(*I); - DEBUG(dbgs() << "\tRegister: " << *I << "\n"); + DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n"); DEBUG(dbgs() << "\tCost: " << Cost << "\n"); DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. @@ -726,7 +726,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n"); + DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) + << " as used in instr\n"); UsedInInstr.set(Reg); } @@ -776,7 +777,7 @@ void RAFast::AllocateBasicBlock() { // Otherwise, sequentially allocate each instruction in the MBB. while (MII != MBB->end()) { MachineInstr *MI = MII++; - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); DEBUG({ dbgs() << "\n>> " << *MI << "Regs:"; for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { @@ -889,7 +890,7 @@ void RAFast::AllocateBasicBlock() { VirtOpEnd = i+1; if (MO.isUse()) { hasTiedOps = hasTiedOps || - TID.getOperandConstraint(i, TOI::TIED_TO) != -1; + MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; } else { if (MO.isEarlyClobber()) hasEarlyClobbers = true; @@ -919,7 +920,7 @@ void RAFast::AllocateBasicBlock() { // We didn't detect inline asm tied operands above, so just make this extra // pass for all inline asm. if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) { + (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { handleThroughOperands(MI, VirtDead); // Don't attempt coalescing when we have funny stuff going on. CopyDst = 0; @@ -964,7 +965,7 @@ void RAFast::AllocateBasicBlock() { } unsigned DefOpEnd = MI->getNumOperands(); - if (TID.isCall()) { + if (MCID.isCall()) { // Spill all virtregs before a call. This serves two purposes: 1. If an // exception is thrown, the landing pad is going to expect to find // registers in their spill slots, and 2. we don't have to wade through @@ -975,7 +976,7 @@ void RAFast::AllocateBasicBlock() { // The imp-defs are skipped below, but we still need to mark those // registers as used by the function. - SkippedInstrs.insert(&TID); + SkippedInstrs.insert(&MCID); } // Third scan. @@ -1061,7 +1062,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { MRI->closePhysRegsUsed(*TRI); // Add the clobber lists for all the instructions we skipped earlier. - for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator + for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) if (const unsigned *Defs = (*I)->getImplicitDefs()) while (*Defs) diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 8d06325..912d899 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -22,6 +22,7 @@ #include "SpillPlacement.h" #include "SplitKit.h" #include "VirtRegMap.h" +#include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" @@ -37,7 +38,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -234,7 +234,7 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); @@ -763,32 +763,46 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, // Create the main cross-block interval. const unsigned MainIntv = SE->openIntv(); - // First add all defs that are live out of a block. + // First handle all the blocks with uses. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + bool RegIn = BI.LiveIn && + LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; + bool RegOut = BI.LiveOut && + LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; // Create separate intervals for isolated blocks with multiple uses. - if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) { + // + // |---o---o---| Enter and leave on the stack. + // ____-----____ Create local interval for uses. + // + // | o---o---| Defined in block, leave on stack. + // -----____ Create local interval for uses. + // + // |---o---x | Enter on stack, killed in block. + // ____----- Create local interval for uses. + // + if (!RegIn && !RegOut) { DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); - SE->splitSingleBlock(BI); - SE->selectIntv(MainIntv); + if (!BI.isOneInstr()) { + SE->splitSingleBlock(BI); + SE->selectIntv(MainIntv); + } continue; } - // Should the register be live out? - if (!BI.LiveOut || !RegOut) - continue; - SlotIndex Start, Stop; tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" - << Bundles->getBundle(BI.MBB->getNumber(), 1) + DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) + << (RegIn ? " => " : " -- ") + << "BB#" << BI.MBB->getNumber() + << (RegOut ? " => " : " -- ") + << " EB#" << Bundles->getBundle(BI.MBB->getNumber(), 1) << " [" << Start << ';' << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop + << ") uses [" << BI.FirstUse << ';' << BI.LastUse << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); // The interference interval should either be invalid or overlap MBB. @@ -797,150 +811,266 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, assert((!Intf.hasInterference() || Intf.last() > Start) && "Bad interference"); - // Check interference leaving the block. + // We are now ready to decide where to split in the current block. There + // are many variables guiding the decision: + // + // - RegIn / RegOut: The global splitting algorithm's decisions for our + // ingoing and outgoing bundles. + // + // - BI.BlockIn / BI.BlockOut: Is the live range live-in and/or live-out + // from this block. + // + // - Intf.hasInterference(): Is there interference in this block. + // + // - Intf.first() / Inft.last(): The range of interference. + // + // The live range should be split such that MainIntv is live-in when RegIn + // is set, and live-out when RegOut is set. MainIntv should never overlap + // the interference, and the stack interval should never have more than one + // use per block. + + // No splits can be inserted after LastSplitPoint, overlap instead. + SlotIndex LastSplitPoint = Stop; + if (BI.LiveOut) + LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); + + // At this point, we know that either RegIn or RegOut is set. We dealt with + // the all-stack case above. + + // Blocks without interference are relatively easy. if (!Intf.hasInterference()) { - // Block is interference-free. - DEBUG(dbgs() << ", no interference"); - if (!BI.LiveThrough) { - DEBUG(dbgs() << ", not live-through.\n"); - SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); - continue; - } - if (!RegIn) { - // Block is live-through, but entry bundle is on the stack. - // Reload just before the first use. - DEBUG(dbgs() << ", not live-in, enter before first use.\n"); - SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); - continue; + DEBUG(dbgs() << ", no interference.\n"); + SE->selectIntv(MainIntv); + // The easiest case has MainIntv live through. + // + // |---o---o---| Live-in, live-out. + // ============= Use MainIntv everywhere. + // + SlotIndex From = Start, To = Stop; + + // Block entry. Reload before the first use if MainIntv is not live-in. + // + // |---o-- Enter on stack. + // ____=== Reload before first use. + // + // | o-- Defined in block. + // === Use MainIntv from def. + // + if (!RegIn) + From = SE->enterIntvBefore(BI.FirstUse); + + // Block exit. Handle cases where MainIntv is not live-out. + if (!BI.LiveOut) + // + // --x | Killed in block. + // === Use MainIntv up to kill. + // + To = SE->leaveIntvAfter(BI.LastUse); + else if (!RegOut) { + // + // --o---| Live-out on stack. + // ===____ Use MainIntv up to last use, switch to stack. + // + // -----o| Live-out on stack, last use after last split point. + // ====== Extend MainIntv to last use, overlapping. + // \____ Copy to stack interval before last split point. + // + if (BI.LastUse < LastSplitPoint) + To = SE->leaveIntvAfter(BI.LastUse); + else { + // The last use is after the last split point, it is probably an + // indirect branch. + To = SE->leaveIntvBefore(LastSplitPoint); + // Run a double interval from the split to the last use. This makes + // it possible to spill the complement without affecting the indirect + // branch. + SE->overlapIntv(To, BI.LastUse); + } } - DEBUG(dbgs() << ", live-through.\n"); - continue; - } - - // Block has interference. - DEBUG(dbgs() << ", interference to " << Intf.last()); - if (!BI.LiveThrough && Intf.last() <= BI.FirstUse) { - // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect def from " << BI.FirstUse << '\n'); - SE->useIntv(BI.FirstUse, Stop); + // Paint in MainIntv liveness for this block. + SE->useIntv(From, To); continue; } - SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - if (Intf.last().getBoundaryIndex() < BI.LastUse) { - // There are interference-free uses at the end of the block. - // Find the first use that can get the live-out register. - SmallVectorImpl<SlotIndex>::const_iterator UI = - std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - Intf.last().getBoundaryIndex()); - assert(UI != SA->UseSlots.end() && "Couldn't find last use"); - SlotIndex Use = *UI; - assert(Use <= BI.LastUse && "Couldn't find last use"); - // Only attempt a split befroe the last split point. - if (Use.getBaseIndex() <= LastSplitPoint) { - DEBUG(dbgs() << ", free use at " << Use << ".\n"); - SlotIndex SegStart = SE->enterIntvBefore(Use); - assert(SegStart >= Intf.last() && "Couldn't avoid interference"); - assert(SegStart < LastSplitPoint && "Impossible split point"); - SE->useIntv(SegStart, Stop); - continue; - } - } + // We are now looking at a block with interference, and we know that either + // RegIn or RegOut is set. + assert(Intf.hasInterference() && (RegIn || RegOut) && "Bad invariant"); - // Interference is after the last use. - DEBUG(dbgs() << " after last use.\n"); - SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB); - assert(SegStart >= Intf.last() && "Couldn't avoid interference"); - } + // If the live range is not live through the block, it is possible that the + // interference doesn't even overlap. Deal with those cases first. Since + // no copy instructions are required, we can tolerate interference starting + // or ending at the same instruction that kills or defines our live range. - // Now all defs leading to live bundles are handled, do everything else. - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + // Live-in, killed before interference. + // + // ~~~ Interference after kill. + // |---o---x | Killed in block. + // ========= Use MainIntv everywhere. + // + if (RegIn && !BI.LiveOut && BI.LastUse <= Intf.first()) { + DEBUG(dbgs() << ", live-in, killed before interference.\n"); + SE->selectIntv(MainIntv); + SlotIndex To = SE->leaveIntvAfter(BI.LastUse); + SE->useIntv(Start, To); + continue; + } - // Is the register live-in? - if (!BI.LiveIn || !RegIn) + // Live-out, defined after interference. + // + // ~~~ Interference before def. + // | o---o---| Defined in block. + // ========= Use MainIntv everywhere. + // + if (RegOut && !BI.LiveIn && BI.FirstUse >= Intf.last()) { + DEBUG(dbgs() << ", live-out, defined after interference.\n"); + SE->selectIntv(MainIntv); + SlotIndex From = SE->enterIntvBefore(BI.FirstUse); + SE->useIntv(From, Stop); continue; + } - // We have an incoming register. Check for interference. - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ')'); + // The interference is now known to overlap the live range, but it may + // still be easy to avoid if all the interference is on one side of the + // uses, and we enter or leave on the stack. - // Check interference entering the block. - if (!Intf.hasInterference()) { - // Block is interference-free. - DEBUG(dbgs() << ", no interference"); - if (!BI.LiveThrough) { - DEBUG(dbgs() << ", killed in block.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); - continue; - } - if (!RegOut) { - SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - // Block is live-through, but exit bundle is on the stack. - // Spill immediately after the last use. - if (BI.LastUse < LastSplitPoint) { - DEBUG(dbgs() << ", uses, stack-out.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); - continue; - } - // The last use is after the last split point, it is probably an - // indirect jump. - DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " - << LastSplitPoint << ", stack-out.\n"); - SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint); - SE->useIntv(Start, SegEnd); - // Run a double interval from the split to the last use. - // This makes it possible to spill the complement without affecting the - // indirect branch. - SE->overlapIntv(SegEnd, BI.LastUse); - continue; + // Live-out on stack, interference after last use. + // + // ~~~ Interference after last use. + // |---o---o---| Live-out on stack. + // =========____ Leave MainIntv after last use. + // + // ~ Interference after last use. + // |---o---o--o| Live-out on stack, late last use. + // =========____ Copy to stack after LSP, overlap MainIntv. + // + if (!RegOut && Intf.first() > BI.LastUse.getBoundaryIndex()) { + assert(RegIn && "Stack-in, stack-out should already be handled"); + if (BI.LastUse < LastSplitPoint) { + DEBUG(dbgs() << ", live-in, stack-out, interference after last use.\n"); + SE->selectIntv(MainIntv); + SlotIndex To = SE->leaveIntvAfter(BI.LastUse); + assert(To <= Intf.first() && "Expected to avoid interference"); + SE->useIntv(Start, To); + } else { + DEBUG(dbgs() << ", live-in, stack-out, avoid last split point\n"); + SE->selectIntv(MainIntv); + SlotIndex To = SE->leaveIntvBefore(LastSplitPoint); + assert(To <= Intf.first() && "Expected to avoid interference"); + SE->overlapIntv(To, BI.LastUse); + SE->useIntv(Start, To); } - // Register is live-through. - DEBUG(dbgs() << ", uses, live-through.\n"); - SE->useIntv(Start, Stop); continue; } - // Block has interference. - DEBUG(dbgs() << ", interference from " << Intf.first()); - - if (!BI.LiveThrough && Intf.first() >= BI.LastUse) { - // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect kill at " << BI.LastUse << '\n'); - SE->useIntv(Start, BI.LastUse); + // Live-in on stack, interference before first use. + // + // ~~~ Interference before first use. + // |---o---o---| Live-in on stack. + // ____========= Enter MainIntv before first use. + // + if (!RegIn && Intf.last() < BI.FirstUse.getBaseIndex()) { + assert(RegOut && "Stack-in, stack-out should already be handled"); + DEBUG(dbgs() << ", stack-in, interference before first use.\n"); + SE->selectIntv(MainIntv); + SlotIndex From = SE->enterIntvBefore(BI.FirstUse); + assert(From >= Intf.last() && "Expected to avoid interference"); + SE->useIntv(From, Stop); continue; } - if (Intf.first().getBaseIndex() > BI.FirstUse) { - // There are interference-free uses at the beginning of the block. - // Find the last use that can get the register. - SmallVectorImpl<SlotIndex>::const_iterator UI = - std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - Intf.first().getBaseIndex()); - assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); - SlotIndex Use = (--UI)->getBoundaryIndex(); - DEBUG(dbgs() << ", free use at " << *UI << ".\n"); - SlotIndex SegEnd = SE->leaveIntvAfter(Use); - assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); - SE->useIntv(Start, SegEnd); - continue; + // The interference is overlapping somewhere we wanted to use MainIntv. That + // means we need to create a local interval that can be allocated a + // different register. + DEBUG(dbgs() << ", creating local interval.\n"); + unsigned LocalIntv = SE->openIntv(); + + // We may be creating copies directly between MainIntv and LocalIntv, + // bypassing the stack interval. When we do that, we should never use the + // leaveIntv* methods as they define values in the stack interval. By + // starting from the end of the block and working our way backwards, we can + // get by with only enterIntv* methods. + // + // When selecting split points, we generally try to maximize the stack + // interval as long at it contains no uses, maximize the main interval as + // long as it doesn't overlap interference, and minimize the local interval + // that we don't know how to allocate yet. + + // Handle the block exit, set Pos to the first handled slot. + SlotIndex Pos = BI.LastUse; + if (RegOut) { + assert(Intf.last() < LastSplitPoint && "Cannot be live-out in register"); + // Create a snippet of MainIntv that is live-out. + // + // ~~~ Interference overlapping uses. + // --o---| Live-out in MainIntv. + // ----=== Switch from LocalIntv to MainIntv after interference. + // + SE->selectIntv(MainIntv); + Pos = SE->enterIntvAfter(Intf.last()); + assert(Pos >= Intf.last() && "Expected to avoid interference"); + SE->useIntv(Pos, Stop); + SE->selectIntv(LocalIntv); + } else if (BI.LiveOut) { + if (BI.LastUse < LastSplitPoint) { + // Live-out on the stack. + // + // ~~~ Interference overlapping uses. + // --o---| Live-out on stack. + // ---____ Switch from LocalIntv to stack after last use. + // + Pos = SE->leaveIntvAfter(BI.LastUse); + } else { + // Live-out on the stack, last use after last split point. + // + // ~~~ Interference overlapping uses. + // --o--o| Live-out on stack, late use. + // ------ Copy to stack before LSP, overlap LocalIntv. + // \__ + // + Pos = SE->leaveIntvBefore(LastSplitPoint); + // We need to overlap LocalIntv so it can reach LastUse. + SE->overlapIntv(Pos, BI.LastUse); + } } - // Interference is before the first use. - DEBUG(dbgs() << " before first use.\n"); - SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); + // When not live-out, leave Pos at LastUse. We have handled everything from + // Pos to Stop. Find the starting point for LocalIntv. + assert(SE->currentIntv() == LocalIntv && "Expecting local interval"); + + if (RegIn) { + assert(Start < Intf.first() && "Cannot be live-in with interference"); + // Live-in in MainIntv, only use LocalIntv for interference. + // + // ~~~ Interference overlapping uses. + // |---o-- Live-in in MainIntv. + // ====--- Switch to LocalIntv before interference. + // + SlotIndex Switch = SE->enterIntvBefore(Intf.first()); + assert(Switch <= Intf.first() && "Expected to avoid interference"); + SE->useIntv(Switch, Pos); + SE->selectIntv(MainIntv); + SE->useIntv(Start, Switch); + } else { + // Live-in on stack, enter LocalIntv before first use. + // + // ~~~ Interference overlapping uses. + // |---o-- Live-in in MainIntv. + // ____--- Reload to LocalIntv before interference. + // + // Defined in block. + // + // ~~~ Interference overlapping uses. + // | o-- Defined in block. + // --- Begin LocalIntv at first use. + // + SlotIndex Switch = SE->enterIntvBefore(BI.FirstUse); + SE->useIntv(Switch, Pos); + } } // Handle live-through blocks. + SE->selectIntv(MainIntv); for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 7a2ea6c..0dd3c59 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -18,6 +18,7 @@ #include "VirtRegRewriter.h" #include "RegisterClassInfo.h" #include "Spiller.h" +#include "RegisterCoalescer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -28,7 +29,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -58,11 +58,6 @@ NewHeuristic("new-spilling-heuristic", cl::init(false), cl::Hidden); static cl::opt<bool> -PreSplitIntervals("pre-alloc-split", - cl::desc("Pre-register allocation live interval splitting"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> TrivCoalesceEnds("trivial-coalesce-ends", cl::desc("Attempt trivial coalescing of interval ends"), cl::init(false), cl::Hidden); @@ -101,10 +96,9 @@ namespace { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup( + initializeRegisterCoalescerPass( *PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); - initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -217,8 +211,6 @@ namespace { // to coalescing and which analyses coalescing invalidates. AU.addRequiredTransitive<RegisterCoalescer>(); AU.addRequired<CalculateSpillWeights>(); - if (PreSplitIntervals) - AU.addRequiredID(PreAllocSplittingID); AU.addRequiredID(LiveStacksID); AU.addPreservedID(LiveStacksID); AU.addRequired<MachineLoopInfo>(); @@ -401,11 +393,10 @@ INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) -INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_AG_DEPENDENCY(RegisterCoalescer) +INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", "Linear Scan Register Allocator", false, false) diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 49f8fb4..72230d4 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -35,6 +35,7 @@ #include "Splitter.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "RegisterCoalescer.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -46,7 +47,6 @@ #include "llvm/CodeGen/PBQP/Graph.h" #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -88,7 +88,7 @@ public: : MachineFunctionPass(ID), builder(b), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h index 6f7d9c9..d21fd67 100644 --- a/lib/CodeGen/RegisterClassInfo.h +++ b/lib/CodeGen/RegisterClassInfo.h @@ -112,7 +112,7 @@ public: /// register, so a register allocator needs to track its liveness and /// availability. bool isAllocatable(unsigned PhysReg) const { - return TRI->get(PhysReg).inAllocatableClass && !isReserved(PhysReg); + return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg); } }; } // end namespace llvm diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 407559a..d5025b9 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -13,38 +13,92 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/RegisterCoalescer.h" +#define DEBUG_TYPE "regcoalescing" +#include "RegisterCoalescer.h" +#include "VirtRegMap.h" +#include "LiveDebugVariables.h" + +#include "llvm/Pass.h" +#include "llvm/Value.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Pass.h" - +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <cmath> using namespace llvm; -// Register the RegisterCoalescer interface, providing a nice name to refer to. -INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", - SimpleRegisterCoalescing) -char RegisterCoalescer::ID = 0; +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numCrossRCs , "Number of cross class joins performed"); +STATISTIC(numCommutes , "Number of instruction commuting performed"); +STATISTIC(numExtends , "Number of copies extended"); +STATISTIC(NumReMats , "Number of instructions re-materialized"); +STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); +STATISTIC(numAborts , "Number of times interval joining aborted"); -// RegisterCoalescer destructor: DO NOT move this to the header file -// for RegisterCoalescer or else clients of the RegisterCoalescer -// class may not depend on the RegisterCoalescer.o file in the current -// .a file, causing alias analysis support to not be included in the -// tool correctly! -// -RegisterCoalescer::~RegisterCoalescer() {} +static cl::opt<bool> +EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + +static cl::opt<bool> +DisableCrossClassJoin("disable-cross-class-join", + cl::desc("Avoid coalescing cross register class copies"), + cl::init(false), cl::Hidden); -unsigned CoalescerPair::compose(unsigned a, unsigned b) const { +static cl::opt<bool> +EnablePhysicalJoin("join-physregs", + cl::desc("Join physical register copies"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +VerifyCoalescing("verify-coalescing", + cl::desc("Verify machine instrs before and after register coalescing"), + cl::Hidden); + +INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) +INITIALIZE_PASS_DEPENDENCY(PHIElimination) +INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) + +char RegisterCoalescer::ID = 0; + +static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { if (!a) return b; if (!b) return a; - return tri_.composeSubRegIndices(a, b); + return tri.composeSubRegIndices(a, b); } -bool CoalescerPair::isMoveInstr(const MachineInstr *MI, - unsigned &Src, unsigned &Dst, - unsigned &SrcSub, unsigned &DstSub) const { +static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) { if (MI->isCopy()) { Dst = MI->getOperand(0).getReg(); DstSub = MI->getOperand(0).getSubReg(); @@ -52,7 +106,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI, SrcSub = MI->getOperand(1).getSubReg(); } else if (MI->isSubregToReg()) { Dst = MI->getOperand(0).getReg(); - DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); + DstSub = compose(tri, MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); Src = MI->getOperand(2).getReg(); SrcSub = MI->getOperand(2).getSubReg(); } else @@ -66,7 +121,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { flipped_ = crossClass_ = false; unsigned Src, Dst, SrcSub, DstSub; - if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub)) return false; partial_ = SrcSub || DstSub; @@ -156,7 +211,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (!MI) return false; unsigned Src, Dst, SrcSub, DstSub; - if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub)) return false; // Find the virtual register that is srcReg_. @@ -185,13 +240,1550 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (dstReg_ != Dst) return false; // Registers match, do the subregisters line up? - return compose(subIdx_, SrcSub) == DstSub; + return compose(tri_, subIdx_, SrcSub) == DstSub; + } +} + +void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + AU.addPreservedID(StrongPHIEliminationID); + AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { + /// Joined copies are not deleted immediately, but kept in JoinedCopies. + JoinedCopies.insert(CopyMI); + + /// Mark all register operands of CopyMI as <undef> so they won't affect dead + /// code elimination. + for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), + E = CopyMI->operands_end(); I != E; ++I) + if (I->isReg()) + I->setIsUndef(true); +} + +/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a copy from B, +/// see if we can merge these two pieces of B into a single value number, +/// eliminating a copy. For example: +/// +/// A3 = B0 +/// ... +/// B1 = A3 <- this copy +/// +/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 +/// value number to be replaced with B0 (which simplifies the B liveinterval). +/// +/// This returns true if an interval was modified. +/// +bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, + MachineInstr *CopyMI) { + // Bail if there is no dst interval - can happen when merging physical subreg + // operations. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + if (BLR == IntB.end()) return false; + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->isDefByCopy()) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); + // The live range might not exist after fun with physreg coalescing. + if (ALR == IntA.end()) return false; + VNInfo *AValNo = ALR->valno; + // If it's re-defined by an early clobber somewhere in the live range, then + // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. + // See PR3149: + // 172 %ECX<def> = MOV32rr %reg1039<kill> + // 180 INLINEASM <es:subl $5,$1 + // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, + // %EAX<kill>, + // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 + // 188 %EAX<def> = MOV32rr %EAX<kill> + // 196 %ECX<def> = MOV32rr %ECX<kill> + // 204 %ECX<def> = MOV32rr %ECX<kill> + // 212 %EAX<def> = MOV32rr %EAX<kill> + // 220 %EAX<def> = MOV32rr %EAX + // 228 %reg1039<def> = MOV32rr %ECX<kill> + // The early clobber operand ties ECX input to the ECX def. + // + // The live interval of ECX is represented as this: + // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) + // The coalescer has no idea there was a def in the middle of [174,230]. + if (AValNo->hasRedefByEC()) + return false; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + // Get the instruction that defines this value number. + if (!CP.isCoalescable(AValNo->getCopy())) + return false; + + // Get the LiveRange in IntB that this value number starts with. + LiveInterval::iterator ValLR = + IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); + if (ValLR == IntB.end()) + return false; + + // Make sure that the end of the live range is inside the same block as + // CopyMI. + MachineInstr *ValLREndInst = + li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + return false; + + // Okay, we now know that ValLR ends in the same block that the CopyMI + // live-range starts. If there are no intervening live ranges between them in + // IntB, we can merge them. + if (ValLR+1 != BLR) return false; + + // If a live interval is a physical register, conservatively check if any + // of its aliases is overlapping the live interval of the virtual register. + // If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { + DEBUG({ + dbgs() << "\t\tInterfere with alias "; + li_->getInterval(*AS).print(dbgs(), tri_); + }); + return false; + } + } + + DEBUG({ + dbgs() << "Extending: "; + IntB.print(dbgs(), tri_); + }); + + SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. Update the valnum with the new defining + // instruction #. + BValNo->def = FillerStart; + BValNo->setCopy(0); + + // Okay, we can merge them. We need to insert a new liverange: + // [ValLR.end, BLR.begin) of either value number, then we merge the + // two value numbers. + IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has sub-registers, update their live intervals as well. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &SRLI = li_->getInterval(*SR); + SRLI.addRange(LiveRange(FillerStart, FillerEnd, + SRLI.getNextValue(FillerStart, 0, + li_->getVNInfoAllocator()))); + } + } + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValLR->valno) { + // If B1 is killed by a PHI, then the merged live range must also be killed + // by the same PHI, as B0 and B1 can not overlap. + bool HasPHIKill = BValNo->hasPHIKill(); + IntB.MergeValueNumberInto(BValNo, ValLR->valno); + if (HasPHIKill) + ValLR->valno->setHasPHIKill(true); + } + DEBUG({ + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << "\n"; + }); + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) { + ValLREndInst->getOperand(UIdx).setIsKill(false); + } + + // If the copy instruction was killing the destination register before the + // merge, find the last use and trim the live range. That will also add the + // isKill marker. + if (ALR->end == CopyIdx) + li_->shrinkToUses(&IntA); + + ++numExtends; + return true; +} + +/// HasOtherReachingDefs - Return true if there are definitions of IntB +/// other than BValNo val# that can reach uses of AValno val# of IntA. +bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + LiveInterval::Ranges::iterator BI = + std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); + if (BI != IntB.ranges.begin()) + --BI; + for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + if (BI->valno == BValNo) + continue; + if (BI->start <= AI->start && BI->end > AI->start) + return true; + if (BI->start > AI->start && BI->start < AI->end) + return true; + } + } + return false; +} + +/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// IntA being the source and IntB being the dest, thus this defines a value +/// number in IntB. If the source value number (in IntA) is defined by a +/// commutable instruction and its other operand is coalesced to the copy dest +/// register, see if we can transform the copy into a noop by commuting the +/// definition. For example, +/// +/// A3 = op A2 B0<kill> +/// ... +/// B1 = A3 <- this copy +/// ... +/// = op A3 <- more uses +/// +/// ==> +/// +/// B2 = op B0 A2<kill> +/// ... +/// B1 = B2 <- now an identify copy +/// ... +/// = op B2 <- more uses +/// +/// This returns true if an interval was modified. +/// +bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { + // FIXME: For now, only eliminate the copy by commuting its def when the + // source register is a virtual register. We want to guard against cases + // where the copy is a back edge copy and commuting the def lengthen the + // live interval of the source register to the entire loop. + if (CP.isPhys() && CP.isFlipped()) + return false; + + // Bail if there is no dst interval. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); + if (!BValNo || !BValNo->isDefByCopy()) + return false; + + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); + assert(AValNo && "COPY source not live"); + + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; + const MCInstrDesc &MCID = DefMI->getDesc(); + if (!MCID.isCommutable()) + return false; + // If DefMI is a two-address instruction then commuting it will change the + // destination register. + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + assert(DefIdx != -1); + unsigned UseOpIdx; + if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) + return false; + unsigned Op1, Op2, NewDstIdx; + if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) + return false; + if (Op1 == UseOpIdx) + NewDstIdx = Op2; + else if (Op2 == UseOpIdx) + NewDstIdx = Op1; + else + return false; + + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (NewReg != IntB.reg || !NewDstMO.isKill()) + return false; + + // Make sure there are no other definitions of IntB that would reach the + // uses which the new definition can reach. + if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + return false; + + // Abort if the aliases of IntB.reg have values that are not simply the + // clobbers from the superreg. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && + HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) + return false; + + // If some of the uses of IntA.reg is already coalesced away, return false. + // It's not possible to determine whether it's safe to perform the coalescing. + for (MachineRegisterInfo::use_nodbg_iterator UI = + mri_->use_nodbg_begin(IntA.reg), + UE = mri_->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + SlotIndex UseIdx = li_->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end()) + continue; + if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) + return false; + } + + DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' + << *DefMI); + + // At this point we have decided that it is legal to do this + // transformation. Start by commuting the instruction. + MachineBasicBlock *MBB = DefMI->getParent(); + MachineInstr *NewMI = tii_->commuteInstruction(DefMI); + if (!NewMI) + return false; + if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && + TargetRegisterInfo::isVirtualRegister(IntB.reg) && + !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg))) + return false; + if (NewMI != DefMI) { + li_->ReplaceMachineInstrInMaps(DefMI, NewMI); + MBB->insert(DefMI, NewMI); + MBB->erase(DefMI); + } + unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); + NewMI->getOperand(OpIdx).setIsKill(); + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A<kill> + // ... + // = B + + // Update uses of IntA of the specific Val# with IntB. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), + UE = mri_->use_end(); UI != UE;) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + if (UseMI->isDebugValue()) { + // FIXME These don't have an instruction index. Not clear we have enough + // info to decide whether to do this replacement or not. For now do it. + UseMO.setReg(NewReg); + continue; + } + SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end() || ULR->valno != AValNo) + continue; + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *tri_); + else + UseMO.setReg(NewReg); + if (UseMI == CopyMI) + continue; + if (!UseMI->isCopy()) + continue; + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + + // This copy will become a noop. If it's defining a new val#, merge it into + // BValNo. + SlotIndex DefIdx = UseIdx.getDefIndex(); + VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); + if (!DVNI) + continue; + DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); + assert(DVNI->def == DefIdx); + BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); + markAsJoined(UseMI); + } + + // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // is updated. + VNInfo *ValNo = BValNo; + ValNo->def = AValNo->def; + ValNo->setCopy(0); + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); + } + DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); + + IntA.removeValNo(AValNo); + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); + ++numCommutes; + return true; +} + +/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// computation, replace the copy by rematerialize the definition. +bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, + bool preserveSrcInt, + unsigned DstReg, + unsigned DstSubIdx, + MachineInstr *CopyMI) { + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex(); + LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); + assert(SrcLR != SrcInt.end() && "Live range not found!"); + VNInfo *ValNo = SrcLR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill()) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + if (!DefMI) + return false; + assert(DefMI && "Defining instruction disappeared"); + const MCInstrDesc &MCID = DefMI->getDesc(); + if (!MCID.isAsCheapAsAMove()) + return false; + if (!tii_->isTriviallyReMaterializable(DefMI, AA)) + return false; + bool SawStore = false; + if (!DefMI->isSafeToMove(tii_, AA, SawStore)) + return false; + if (MCID.getNumDefs() != 1) + return false; + if (!DefMI->isImplicitDef()) { + // Make sure the copy destination register class fits the instruction + // definition register class. The mismatch can happen as a result of earlier + // extract_subreg, insert_subreg, subreg_to_reg coalescing. + const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (mri_->getRegClass(DstReg) != RC) + return false; + } else if (!RC->contains(DstReg)) + return false; } + + // If destination register has a sub-register index on it, make sure it + // matches the instruction register class. + if (DstSubIdx) { + const MCInstrDesc &MCID = DefMI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); + const TargetRegisterClass *DstSubRC = + DstRC->getSubRegisterRegClass(DstSubIdx); + const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_); + if (DefRC == DstRC) + DstSubIdx = 0; + else if (DefRC != DstSubRC) + return false; + } + + RemoveCopyFlag(DstReg, CopyMI); + + MachineBasicBlock *MBB = CopyMI->getParent(); + MachineBasicBlock::iterator MII = + llvm::next(MachineBasicBlock::iterator(CopyMI)); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); + MachineInstr *NewMI = prior(MII); + + // CopyMI may have implicit operands, transfer them over to the newly + // rematerialized instruction. And update implicit def interval valnos. + for (unsigned i = CopyMI->getDesc().getNumOperands(), + e = CopyMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = CopyMI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + if (MO.isDef()) + RemoveCopyFlag(MO.getReg(), CopyMI); + } + + NewMI->copyImplicitOps(CopyMI); + li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); + CopyMI->eraseFromParent(); + ReMatCopies.insert(CopyMI); + ReMatDefs.insert(DefMI); + DEBUG(dbgs() << "Remat: " << *NewMI); + ++NumReMats; + + // The source interval can become smaller because we removed a use. + if (preserveSrcInt) + li_->shrinkToUses(&SrcInt); + + return true; } -// Because of the way .a files work, we must force the SimpleRC -// implementation to be pulled in if the RegisterCoalescer classes are -// pulled in. Otherwise we run the risk of RegisterCoalescer being -// used, but the default implementation not being linked into the tool -// that uses it. -DEFINING_FILE_FOR(RegisterCoalescer) +/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// update the subregister number if it is not zero. If DstReg is a +/// physical register and the existing subregister number of the def / use +/// being updated is not zero, make sure to set it to the correct physical +/// subregister. +void +RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { + bool DstIsPhys = CP.isPhys(); + unsigned SrcReg = CP.getSrcReg(); + unsigned DstReg = CP.getDstReg(); + unsigned SubIdx = CP.getSubIdx(); + + // Update LiveDebugVariables. + ldv_->renameRegister(SrcReg, DstReg, SubIdx); + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); + MachineInstr *UseMI = I.skipInstruction();) { + // A PhysReg copy that won't be coalesced can perhaps be rematerialized + // instead. + if (DstIsPhys) { + if (UseMI->isCopy() && + !UseMI->getOperand(1).getSubReg() && + !UseMI->getOperand(0).getSubReg() && + UseMI->getOperand(1).getReg() == SrcReg && + UseMI->getOperand(0).getReg() != SrcReg && + UseMI->getOperand(0).getReg() != DstReg && + !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), false, + UseMI->getOperand(0).getReg(), 0, UseMI)) + continue; + } + + SmallVector<unsigned,8> Ops; + bool Reads, Writes; + tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + bool Kills = false, Deads = false; + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + Kills |= MO.isKill(); + Deads |= MO.isDead(); + + if (DstIsPhys) + MO.substPhysReg(DstReg, *tri_); + else + MO.substVirtReg(DstReg, SubIdx, *tri_); + } + + // This instruction is a copy that will be removed. + if (JoinedCopies.count(UseMI)) + continue; + + if (SubIdx) { + // If UseMI was a simple SrcReg def, make sure we didn't turn it into a + // read-modify-write of DstReg. + if (Deads) + UseMI->addRegisterDead(DstReg, tri_); + else if (!Reads && Writes) + UseMI->addRegisterDefined(DstReg, tri_); + + // Kill flags apply to the whole physical register. + if (DstIsPhys && Kills) + UseMI->addRegisterKilled(DstReg, tri_); + } + + DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << li_->getInstructionIndex(UseMI) << "\t"; + dbgs() << *UseMI; + }); + } +} + +/// removeIntervalIfEmpty - Check if the live interval of a physical register +/// is empty, if so remove it and also remove the empty intervals of its +/// sub-registers. Return true if live interval is removed. +static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, + const TargetRegisterInfo *tri_) { + if (li.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + if (sli.empty()) + li_->removeInterval(*SR); + } + li_->removeInterval(li.reg); + return true; + } + return false; +} + +/// RemoveDeadDef - If a def of a live interval is now determined dead, remove +/// the val# it defines. If the live interval becomes empty, remove it as well. +bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, + MachineInstr *DefMI) { + SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex(); + LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); + if (DefIdx != MLR->valno->def) + return false; + li.removeValNo(MLR->valno); + return removeIntervalIfEmpty(li, li_, tri_); +} + +void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, + const MachineInstr *CopyMI) { + SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + if (li_->hasInterval(DstReg)) { + LiveInterval &LI = li_->getInterval(DstReg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->def == DefIdx) + LR->valno->setCopy(0); + } + if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) + return; + for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) + continue; + LiveInterval &LI = li_->getInterval(*AS); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->def == DefIdx) + LR->valno->setCopy(0); + } +} + +/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. +/// We need to be careful about coalescing a source physical register with a +/// virtual register. Once the coalescing is done, it cannot be broken and these +/// are not spillable! If the destination interval uses are far away, think +/// twice about coalescing them! +bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { + bool Allocatable = li_->isAllocatable(CP.getDstReg()); + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + /// Always join simple intervals that are defined by a single copy from a + /// reserved register. This doesn't increase register pressure, so it is + /// always beneficial. + if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) + return true; + + if (!EnablePhysicalJoin) { + DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); + return false; + } + + // Only coalesce to allocatable physreg, we don't want to risk modifying + // reserved registers. + if (!Allocatable) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. + } + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } + + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial()) { + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold) { + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + return false; + } + } + return true; +} + +/// isWinToJoinCrossClass - Return true if it's profitable to coalesce +/// two virtual registers from different register classes. +bool +RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC) { + unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); + // This heuristics is good enough in practice, but it's obviously not *right*. + // 4 is a magic number that works well enough for x86, ARM, etc. It filter + // out all but the most restrictive register classes. + if (NewRCCount > 4 || + // Early exit if the function is fairly small, coalesce aggressively if + // that's the case. For really special register classes with 3 or + // fewer registers, be a bit more careful. + (li_->getFuncInstructionCount() / NewRCCount) < 8) + return true; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); + unsigned DstSize = li_->getApproximateInstructionCount(DstInt); + + // Coalesce aggressively if the intervals are small compared to the number of + // registers in the new class. The number 4 is fairly arbitrary, chosen to be + // less aggressive than the 8 used for the whole function size. + const unsigned ThresSize = 4 * NewRCCount; + if (SrcSize <= ThresSize && DstSize <= ThresSize) + return true; + + // Estimate *register use density*. If it doubles or more, abort. + unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), + mri_->use_nodbg_end()); + unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), + mri_->use_nodbg_end()); + unsigned NewUses = SrcUses + DstUses; + unsigned NewSize = SrcSize + DstSize; + if (SrcRC != NewRC && SrcSize > ThresSize) { + unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); + if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) + return false; + } + if (DstRC != NewRC && DstSize > ThresSize) { + unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); + if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) + return false; + } + return true; +} + + +/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// which are the src/dst of the copy instruction CopyMI. This returns true +/// if the copy was successfully coalesced away. If it is not currently +/// possible to coalesce this interval, but it may be possible if other +/// things get coalesced, then it returns true by reference in 'Again'. +bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { + + Again = false; + if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) + return false; // Already done. + + DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + + CoalescerPair CP(*tii_, *tri_); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; + } + + // If they are already joined we continue. + if (CP.getSrcReg() == CP.getDstReg()) { + markAsJoined(CopyMI); + DEBUG(dbgs() << "\tCopy already coalesced.\n"); + return false; // Not coalescable. + } + + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) + << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) + << "\n"); + + // Enforce policies. + if (CP.isPhys()) { + if (!shouldJoinPhys(CP)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + return false; + } + } else { + // Avoid constraining virtual register regclass too much. + if (CP.isCrossClass()) { + DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); + if (DisableCrossClassJoin) { + DEBUG(dbgs() << "\tCross-class joins disabled.\n"); + return false; + } + if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), + mri_->getRegClass(CP.getSrcReg()), + mri_->getRegClass(CP.getDstReg()), + CP.getNewRC())) { + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); + Again = true; // May be possible to coalesce later. + return false; + } + } + + // When possible, let DstReg be the larger interval. + if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > + li_->getInterval(CP.getDstReg()).ranges.size()) + CP.flip(); + } + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + if (!JoinIntervals(CP)) { + // Coalescing failed. + + // If definition of source is defined by trivial computation, try + // rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + + // If we can eliminate the copy without merging the live ranges, do so now. + if (!CP.isPartial()) { + if (AdjustCopiesBackFrom(CP, CopyMI) || + RemoveCopyByCommutingDef(CP, CopyMI)) { + markAsJoined(CopyMI); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } + } + + // Otherwise, we are unable to join the intervals. + DEBUG(dbgs() << "\tInterference!\n"); + Again = true; // May be possible to coalesce later. + return false; + } + + // Coalescing to a virtual register that is of a sub-register class of the + // other. Make sure the resulting register is set to the right register class. + if (CP.isCrossClass()) { + ++numCrossRCs; + mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); + } + + // Remember to delete the copy instruction. + markAsJoined(CopyMI); + + UpdateRegDefsUses(CP); + + // If we have extended the live range of a physical register, make sure we + // update live-in lists as well. + if (CP.isPhys()) { + SmallVector<MachineBasicBlock*, 16> BlockSeq; + // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the + // ranges for this, and they are preserved. + LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); + for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); + I != E; ++I ) { + li_->findLiveInMBBs(I->start, I->end, BlockSeq); + for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { + MachineBasicBlock &block = *BlockSeq[idx]; + if (!block.isLiveIn(CP.getDstReg())) + block.addLiveIn(CP.getDstReg()); + } + BlockSeq.clear(); + } + } + + // SrcReg is guarateed to be the register whose live interval that is + // being merged. + li_->removeInterval(CP.getSrcReg()); + + // Update regalloc hint. + tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); + + DEBUG({ + LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); + dbgs() << "\tJoined. Result = "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; + }); + + ++numJoins; + return true; +} + +/// ComputeUltimateVN - Assuming we are going to join two live intervals, +/// compute what the resultant value numbers for each value in the input two +/// ranges will be. This is complicated by copies between the two which can +/// and will commonly cause multiple value numbers to be merged into one. +/// +/// VN is the value number that we're trying to resolve. InstDefiningValue +/// keeps track of the new InstDefiningValue assignment for the result +/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of +/// whether a value in this or other is a copy from the opposite set. +/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have +/// already been assigned. +/// +/// ThisFromOther[x] - If x is defined as a copy from the other interval, this +/// contains the value number the copy is from. +/// +static unsigned ComputeUltimateVN(VNInfo *VNI, + SmallVector<VNInfo*, 16> &NewVNInfo, + DenseMap<VNInfo*, VNInfo*> &ThisFromOther, + DenseMap<VNInfo*, VNInfo*> &OtherFromThis, + SmallVector<int, 16> &ThisValNoAssignments, + SmallVector<int, 16> &OtherValNoAssignments) { + unsigned VN = VNI->id; + + // If the VN has already been computed, just return it. + if (ThisValNoAssignments[VN] >= 0) + return ThisValNoAssignments[VN]; + assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); + + // If this val is not a copy from the other val, then it must be a new value + // number in the destination. + DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); + if (I == ThisFromOther.end()) { + NewVNInfo.push_back(VNI); + return ThisValNoAssignments[VN] = NewVNInfo.size()-1; + } + VNInfo *OtherValNo = I->second; + + // Otherwise, this *is* a copy from the RHS. If the other side has already + // been computed, return it. + if (OtherValNoAssignments[OtherValNo->id] >= 0) + return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; + + // Mark this value number as currently being computed, then ask what the + // ultimate value # of the other value is. + ThisValNoAssignments[VN] = -2; + unsigned UltimateVN = + ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, + OtherValNoAssignments, ThisValNoAssignments); + return ThisValNoAssignments[VN] = UltimateVN; +} + + +// Find out if we have something like +// A = X +// B = X +// if so, we can pretend this is actually +// A = X +// B = A +// which allows us to coalesce A and B. +// VNI is the definition of B. LR is the life range of A that includes +// the slot just before B. If we return true, we add "B = X" to DupCopies. +static bool RegistersDefinedFromSameValue(LiveIntervals &li, + const TargetRegisterInfo &tri, + CoalescerPair &CP, + VNInfo *VNI, + LiveRange *LR, + SmallVector<MachineInstr*, 8> &DupCopies) { + return false; // To see if this fixes the i386 dragonegg buildbot miscompile. + // FIXME: This is very conservative. For example, we don't handle + // physical registers. + + MachineInstr *MI = VNI->getCopy(); + + if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys()) + return false; + + unsigned Dst = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + + // FIXME: If "B = X" kills X, we have to move the kill back to its + // previous use. For now we just avoid the optimization in that case. + LiveInterval &SrcInt = li.getInterval(Src); + if (SrcInt.killedAt(VNI->def)) + return false; + + if (!TargetRegisterInfo::isVirtualRegister(Src) || + !TargetRegisterInfo::isVirtualRegister(Dst)) + return false; + + unsigned A = CP.getDstReg(); + unsigned B = CP.getSrcReg(); + + if (B == Dst) + std::swap(A, B); + assert(Dst == A); + + VNInfo *Other = LR->valno; + if (!Other->isDefByCopy()) + return false; + const MachineInstr *OtherMI = Other->getCopy(); + + if (!OtherMI->isFullCopy()) + return false; + + unsigned OtherDst = OtherMI->getOperand(0).getReg(); + unsigned OtherSrc = OtherMI->getOperand(1).getReg(); + + if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) || + !TargetRegisterInfo::isVirtualRegister(OtherDst)) + return false; + + assert(OtherDst == B); + + if (Src != OtherSrc) + return false; + + DupCopies.push_back(MI); + + return true; +} + +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { + LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); + DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // If a live interval is a physical register, check for interference with any + // aliases. The interference check implemented here is a bit more conservative + // than the full interfeence check below. We allow overlapping live ranges + // only when one is a copy of the other. + if (CP.isPhys()) { + for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ + if (!li_->hasInterval(*AS)) + continue; + const LiveInterval &LHS = li_->getInterval(*AS); + LiveInterval::const_iterator LI = LHS.begin(); + for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); + RI != RE; ++RI) { + LI = std::lower_bound(LI, LHS.end(), RI->start); + // Does LHS have an overlapping live range starting before RI? + if ((LI != LHS.begin() && LI[-1].end > RI->start) && + (RI->start != RI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; + }); + return false; + } + + // Check that LHS ranges beginning in this range are copies. + for (; LI != LHS.end() && LI->start < RI->end; ++LI) { + if (LI->start != LI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; + }); + return false; + } + } + } + } + } + + // Compute the final value assignment, assuming that the live ranges can be + // coalesced. + SmallVector<int, 16> LHSValNoAssignments; + SmallVector<int, 16> RHSValNoAssignments; + DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; + DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; + SmallVector<VNInfo*, 16> NewVNInfo; + + SmallVector<MachineInstr*, 8> DupCopies; + + LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); + DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + continue; + + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; + + // Figure out the value # from the RHS. + LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + MachineInstr *MI = VNI->getCopy(); + if (!CP.isCoalescable(MI) && + !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies)) + continue; + + LHSValsDefinedFromRHS[VNI] = lr->valno; + } + + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + continue; + + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; + + // Figure out the value # from the LHS. + LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + MachineInstr *MI = VNI->getCopy(); + if (!CP.isCoalescable(MI) && + !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies)) + continue; + + RHSValsDefinedFromLHS[VNI] = lr->valno; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; + } + + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); + } + + // Armed with the mappings of LHS/RHS values to ultimate values, walk the + // interval lists to see if these intervals are coalescable. + LiveInterval::const_iterator I = LHS.begin(); + LiveInterval::const_iterator IE = LHS.end(); + LiveInterval::const_iterator J = RHS.begin(); + LiveInterval::const_iterator JE = RHS.end(); + + // Skip ahead until the first place of potential sharing. + if (I != IE && J != JE) { + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } + } + + while (I != IE && J != JE) { + // Determine if these two live ranges overlap. + bool Overlaps; + if (I->start < J->start) { + Overlaps = I->end > J->start; + } else { + Overlaps = J->end > I->start; + } + + // If so, check value # info to determine if they are really different. + if (Overlaps) { + // If the live range overlap will map to the same value number in the + // result liverange, we can still coalesce them. If not, we can't. + if (LHSValNoAssignments[I->valno->id] != + RHSValNoAssignments[J->valno->id]) + return false; + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) + return false; + } + + if (I->end < J->end) + ++I; + else + ++J; + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), + E = LHSValsDefinedFromRHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned LHSValID = LHSValNoAssignments[VNI->id]; + if (VNI->hasPHIKill()) + NewVNInfo[LHSValID]->setHasPHIKill(true); + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), + E = RHSValsDefinedFromLHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned RHSValID = RHSValNoAssignments[VNI->id]; + if (VNI->hasPHIKill()) + NewVNInfo[RHSValID]->setHasPHIKill(true); + } + + if (LHSValNoAssignments.empty()) + LHSValNoAssignments.push_back(-1); + if (RHSValNoAssignments.empty()) + RHSValNoAssignments.push_back(-1); + + for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), + E = DupCopies.end(); I != E; ++I) { + MachineInstr *MI = *I; + + // We have pretended that the assignment to B in + // A = X + // B = X + // was actually a copy from A. Now that we decided to coalesce A and B, + // transform the code into + // A = X + // X = X + // and mark the X as coalesced to keep the illusion. + unsigned Src = MI->getOperand(1).getReg(); + MI->getOperand(0).substVirtReg(Src, 0, *tri_); + + markAsJoined(MI); + } + + // If we get here, we know that we can coalesce the live ranges. Ask the + // intervals to coalesce themselves now. + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + mri_); + return true; +} + +namespace { + // DepthMBBCompare - Comparison predicate that sort first based on the loop + // depth of the basic block (the unsigned), and then on the MBB number. + struct DepthMBBCompare { + typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; + bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { + // Deeper loops first + if (LHS.first != RHS.first) + return LHS.first > RHS.first; + + // Prefer blocks that are more connected in the CFG. This takes care of + // the most difficult copies first while intervals are short. + unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); + unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); + if (cl != cr) + return cl > cr; + + // As a last resort, sort by block number. + return LHS.second->getNumber() < RHS.second->getNumber(); + } + }; +} + +void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<MachineInstr*> &TryAgain) { + DEBUG(dbgs() << MBB->getName() << ":\n"); + + SmallVector<MachineInstr*, 8> VirtCopies; + SmallVector<MachineInstr*, 8> PhysCopies; + SmallVector<MachineInstr*, 8> ImpDefCopies; + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E;) { + MachineInstr *Inst = MII++; + + // If this isn't a copy nor a extract_subreg, we can't join intervals. + unsigned SrcReg, DstReg; + if (Inst->isCopy()) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->isSubregToReg()) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); + } else + continue; + + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(Inst); + else if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(Inst); + else + VirtCopies.push_back(Inst); + } + + // Try coalescing implicit copies and insert_subreg <undef> first, + // followed by copies to / from physical registers, then finally copies + // from virtual registers to virtual registers. + for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = ImpDefCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = PhysCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = VirtCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } +} + +void RegisterCoalescer::joinIntervals() { + DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + + std::vector<MachineInstr*> TryAgainList; + if (loopInfo->empty()) { + // If there are no loops in the function, join intervals in function order. + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); + I != E; ++I) + CopyCoalesceInMBB(I, TryAgainList); + } else { + // Otherwise, join intervals in inner loops before other intervals. + // Unfortunately we can't just iterate over loop hierarchy here because + // there may be more MBB's than BB's. Collect MBB's for sorting. + + // Join intervals in the function prolog first. We want to join physical + // registers with virtual registers before the intervals got too long. + std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); + } + + // Sort by loop depth. + std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); + + // Finally, join intervals in loop nest order. + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) + CopyCoalesceInMBB(MBBs[i].second, TryAgainList); + } + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + MachineInstr *&TheCopy = TryAgainList[i]; + if (!TheCopy) + continue; + + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy= 0; // Mark this one as done. + ProgressMade = true; + } + } + } +} + +void RegisterCoalescer::releaseMemory() { + JoinedCopies.clear(); + ReMatCopies.clear(); + ReMatDefs.clear(); +} + +bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + li_ = &getAnalysis<LiveIntervals>(); + ldv_ = &getAnalysis<LiveDebugVariables>(); + AA = &getAnalysis<AliasAnalysis>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); + + if (VerifyCoalescing) + mf_->verify(this, "Before register coalescing"); + + RegClassInfo.runOnMachineFunction(fn); + + // Join (coalesce) intervals if requested. + if (EnableJoining) { + joinIntervals(); + DEBUG({ + dbgs() << "********** INTERVALS POST JOINING **********\n"; + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); + I != E; ++I){ + I->second->print(dbgs(), tri_); + dbgs() << "\n"; + } + }); + } + + // Perform a final pass over the instructions and compute spill weights + // and remove identity moves. + SmallVector<unsigned, 4> DeadDefs; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ) { + MachineInstr *MI = mii; + if (JoinedCopies.count(MI)) { + // Delete all coalesced copies. + bool DoDelete = true; + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + MI->getNumOperands() > 2) + // Do not delete extract_subreg, insert_subreg of physical + // registers unless the definition is dead. e.g. + // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 + // or else the scavenger may complain. LowerSubregs will + // delete them later. + DoDelete = false; + + if (MI->allDefsAreDead()) { + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && + li_->hasInterval(SrcReg)) + li_->shrinkToUses(&li_->getInterval(SrcReg)); + DoDelete = true; + } + if (!DoDelete) { + // We need the instruction to adjust liveness, so make it a KILL. + if (MI->isSubregToReg()) { + MI->RemoveOperand(3); + MI->RemoveOperand(1); + } + MI->setDesc(tii_->get(TargetOpcode::KILL)); + mii = llvm::next(mii); + } else { + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + } + continue; + } + + // Now check if this is a remat'ed def instruction which is now dead. + if (ReMatDefs.count(MI)) { + bool isDead = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + DeadDefs.push_back(Reg); + if (MO.isDead()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !mri_->use_nodbg_empty(Reg)) { + isDead = false; + break; + } + } + if (isDead) { + while (!DeadDefs.empty()) { + unsigned DeadDef = DeadDefs.back(); + DeadDefs.pop_back(); + RemoveDeadDef(li_->getInterval(DeadDef), MI); + } + li_->RemoveMachineInstrFromMaps(mii); + mii = mbbi->erase(mii); + continue; + } else + DeadDefs.clear(); + } + + ++mii; + + // Check for now unnecessary kill flags. + if (li_->isNotInMIMap(MI)) continue; + SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + unsigned reg = MO.getReg(); + if (!reg || !li_->hasInterval(reg)) continue; + if (!li_->getInterval(reg).killedAt(DefIdx)) { + MO.setIsKill(false); + continue; + } + // When leaving a kill flag on a physreg, check if any subregs should + // remain alive. + if (!TargetRegisterInfo::isPhysicalRegister(reg)) + continue; + for (const unsigned *SR = tri_->getSubRegisters(reg); + unsigned S = *SR; ++SR) + if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx)) + MI->addRegisterDefined(S, tri_); + } + } + } + + DEBUG(dump()); + DEBUG(ldv_->dump()); + if (VerifyCoalescing) + mf_->verify(this, "After register coalescing"); + return true; +} + +/// print - Implement the dump method. +void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { + li_->print(O, m); +} + +RegisterCoalescer *llvm::createRegisterCoalescer() { + return new RegisterCoalescer(); +} diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/RegisterCoalescer.h index 92f6c64..4131d91 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -1,4 +1,4 @@ -//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===// +//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,37 +7,38 @@ // //===----------------------------------------------------------------------===// // -// This file implements a simple register copy coalescing phase. +// This file contains the abstract interface for register coalescers, +// allowing them to interact with and query register allocators. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H -#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "RegisterClassInfo.h" +#include "llvm/Support/IncludeFile.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/SmallPtrSet.h" + +#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H +#define LLVM_CODEGEN_REGISTER_COALESCER_H namespace llvm { - class SimpleRegisterCoalescing; - class LiveDebugVariables; + + class MachineFunction; + class RegallocQuery; + class AnalysisUsage; + class MachineInstr; class TargetRegisterInfo; + class TargetRegisterClass; class TargetInstrInfo; + class LiveDebugVariables; class VirtRegMap; class MachineLoopInfo; - /// CopyRec - Representation for copy instructions in coalescer queue. - /// - struct CopyRec { - MachineInstr *MI; - unsigned LoopDepth; - CopyRec(MachineInstr *mi, unsigned depth) - : MI(mi), LoopDepth(depth) {} - }; + class CoalescerPair; - class SimpleRegisterCoalescing : public MachineFunctionPass, - public RegisterCoalescer { + /// An abstract interface for register coalescers. Coalescers must + /// implement this interface to be part of the coalescer analysis + /// group. + class RegisterCoalescer : public MachineFunctionPass { MachineFunction* mf_; MachineRegisterInfo* mri_; const TargetMachine* tm_; @@ -61,41 +62,20 @@ namespace llvm { /// been remat'ed. SmallPtrSet<MachineInstr*, 8> ReMatDefs; - public: - static char ID; // Pass identifcation, replacement for typeid - SimpleRegisterCoalescing() : MachineFunctionPass(ID) { - initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - - /// runOnMachineFunction - pass entry point - virtual bool runOnMachineFunction(MachineFunction&); - - bool coalesceFunction(MachineFunction &mf, RegallocQuery &) { - // This runs as an independent pass, so don't do anything. - return false; - } - - /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* = 0) const; - - private: /// joinIntervals - join compatible live intervals void joinIntervals(); /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting /// copies that cannot yet be coalesced into the "TryAgain" list. void CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<CopyRec> &TryAgain); + std::vector<MachineInstr*> &TryAgain); /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently /// possible to coalesce this interval, but it may be possible if other /// things get coalesced, then it returns true by reference in 'Again'. - bool JoinCopy(CopyRec &TheCopy, bool &Again); + bool JoinCopy(MachineInstr *TheCopy, bool &Again); /// JoinIntervals - Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we can @@ -155,8 +135,109 @@ namespace llvm { /// markAsJoined - Remember that CopyMI has already been joined. void markAsJoined(MachineInstr *CopyMI); + + public: + static char ID; // Class identification, replacement for typeinfo + RegisterCoalescer() : MachineFunctionPass(ID) { + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + } + + /// Register allocators must call this from their own + /// getAnalysisUsage to cover the case where the coalescer is not + /// a Pass in the proper sense and isn't managed by PassManager. + /// PassManager needs to know which analyses to make available and + /// which to invalidate when running the register allocator or any + /// pass that might call coalescing. The long-term solution is to + /// allow hierarchies of PassManagers. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory(); + + /// runOnMachineFunction - pass entry point + virtual bool runOnMachineFunction(MachineFunction&); + + /// print - Implement the dump method. + virtual void print(raw_ostream &O, const Module* = 0) const; }; + /// CoalescerPair - A helper class for register coalescers. When deciding if + /// two registers can be coalesced, CoalescerPair can determine if a copy + /// instruction would become an identity copy after coalescing. + class CoalescerPair { + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + + /// dstReg_ - The register that will be left after coalescing. It can be a + /// virtual or physical register. + unsigned dstReg_; + + /// srcReg_ - the virtual register that will be coalesced into dstReg. + unsigned srcReg_; + + /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the + /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a + /// virtual register. + unsigned subIdx_; + + /// partial_ - True when the original copy was a partial subregister copy. + bool partial_; + + /// crossClass_ - True when both regs are virtual, and newRC is constrained. + bool crossClass_; + + /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy + /// instruction. + bool flipped_; + + /// newRC_ - The register class of the coalesced register, or NULL if dstReg_ + /// is a physreg. + const TargetRegisterClass *newRC_; + + public: + CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri) + : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0), + partial_(false), crossClass_(false), flipped_(false), newRC_(0) {} + + /// setRegisters - set registers to match the copy instruction MI. Return + /// false if MI is not a coalescable copy instruction. + bool setRegisters(const MachineInstr*); + + /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible + /// because dstReg_ is a physical register, or subIdx_ is set. + bool flip(); + + /// isCoalescable - Return true if MI is a copy instruction that will become + /// an identity copy after coalescing. + bool isCoalescable(const MachineInstr*) const; + + /// isPhys - Return true if DstReg is a physical register. + bool isPhys() const { return !newRC_; } + + /// isPartial - Return true if the original copy instruction did not copy the + /// full register, but was a subreg operation. + bool isPartial() const { return partial_; } + + /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's. + bool isCrossClass() const { return crossClass_; } + + /// isFlipped - Return true when getSrcReg is the register being defined by + /// the original copy instruction. + bool isFlipped() const { return flipped_; } + + /// getDstReg - Return the register (virtual or physical) that will remain + /// after coalescing. + unsigned getDstReg() const { return dstReg_; } + + /// getSrcReg - Return the virtual register that will be coalesced away. + unsigned getSrcReg() const { return srcReg_; } + + /// getSubIdx - Return the subregister index in DstReg that SrcReg will be + /// coalesced into, or 0. + unsigned getSubIdx() const { return subIdx_; } + + /// getNewRC - Return the register class of the coalesced register. + const TargetRegisterClass *getNewRC() const { return newRC_; } + }; } // End llvm namespace #endif diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index f328493..21375b2 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -45,7 +45,7 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} /// getInstrDesc helper to handle SDNodes. -const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { +const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { if (!Node || !Node->isMachineOpcode()) return NULL; return &TII->get(Node->getMachineOpcode()); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 2363df4..9cceb4e 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -236,13 +237,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; } - const TargetInstrDesc &TID = MI->getDesc(); - assert(!TID.isTerminator() && !MI->isLabel() && + const MCInstrDesc &MCID = MI->getDesc(); + assert(!MCID.isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); // Create the SUnit for this MI. SUnit *SU = NewSUnit(MI); - SU->isCall = TID.isCall(); - SU->isCommutable = TID.isCommutable(); + SU->isCall = MCID.isCall(); + SU->isCommutable = MCID.isCommutable(); // Assign the Latency field of SU using target-provided information. if (UnitLatencies) @@ -309,13 +310,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { MachineInstr *UseMI = UseSU->getInstr(); - const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); if (RegUseIndex >= 0 && - (UseTID.mayLoad() || UseTID.mayStore()) && - (unsigned)RegUseIndex < UseTID.getNumOperands() && - UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + (UseMCID.mayLoad() || UseMCID.mayStore()) && + (unsigned)RegUseIndex < UseMCID.getNumOperands() && + UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } // Adjust the dependence latency using operand def/use @@ -352,17 +353,17 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { unsigned Count = I->second.second; const MachineInstr *UseMI = UseMO->getParent(); unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); - const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); // TODO: If we knew the total depth of the region here, we could // handle the case where the whole loop is inside the region but // is large enough that the isScheduleHigh trick isn't needed. - if (UseMOIdx < UseTID.getNumOperands()) { + if (UseMOIdx < UseMCID.getNumOperands()) { // Currently, we only support scheduling regions consisting of // single basic blocks. Check to see if the instruction is in // the same region by checking to see if it has the same parent. if (UseMI->getParent() != MI->getParent()) { unsigned Latency = SU->Latency; - if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) Latency += SpecialAddressLatency; // This is a wild guess as to the portion of the latency which // will be overlapped by work done outside the current @@ -374,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*isMustAlias=*/false, /*isArtificial=*/true)); } else if (SpecialAddressLatency > 0 && - UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { // The entire loop body is within the current scheduling region // and the latency of this operation is assumed to be greater // than the latency of the loop. @@ -417,9 +418,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (TID.isCall() || MI->hasUnmodeledSideEffects() || + if (MCID.isCall() || MI->hasUnmodeledSideEffects() || (MI->hasVolatileMemoryRef() && - (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { + (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map<const Value *, SUnit *>::iterator I = @@ -458,7 +459,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); - } else if (TID.mayStore()) { + } else if (MCID.mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { @@ -514,7 +515,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); - } else if (TID.mayLoad()) { + } else if (MCID.mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e6d7ded..0e005d3 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -16,11 +16,11 @@ #define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" using namespace llvm; @@ -115,12 +115,12 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Use the itinerary for the underlying instruction to check for // free FU's in the scoreboard at the appropriate future cycles. - const TargetInstrDesc *TID = DAG->getInstrDesc(SU); - if (TID == NULL) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID == NULL) { // Don't check hazards for non-machineinstr Nodes. return NoHazard; } - unsigned idx = TID->getSchedClass(); + unsigned idx = MCID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must find one of the stage's units free for every cycle the @@ -173,16 +173,16 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. - const TargetInstrDesc *TID = DAG->getInstrDesc(SU); - assert(TID && "The scheduler must filter non-machineinstrs"); - if (DAG->TII->isZeroCost(TID->Opcode)) + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + assert(MCID && "The scheduler must filter non-machineinstrs"); + if (DAG->TII->isZeroCost(MCID->Opcode)) return; ++IssueCount; unsigned cycle = 0; - unsigned idx = TID->getSchedClass(); + unsigned idx = MCID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e3d3906..90e0cc7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1310,16 +1310,6 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } -/// isCarryMaterialization - Returns true if V is an ADDE node that is known to -/// return 0 or 1 depending on the carry flag. -static bool isCarryMaterialization(SDValue V) { - if (V.getOpcode() != ISD::ADDE) - return false; - - ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0)); - return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1); -} - SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1483,18 +1473,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } - // add (adde 0, 0, glue), X -> adde X, 0, glue - if (N0->hasOneUse() && isCarryMaterialization(N0)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), - DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0), - N0.getOperand(2)); - - // add X, (adde 0, 0, glue) -> adde X, 0, glue - if (N1->hasOneUse() && isCarryMaterialization(N1)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), - DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0), - N1.getOperand(2)); - return SDValue(); } @@ -1538,16 +1516,6 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { N->getDebugLoc(), MVT::Glue)); } - // addc (adde 0, 0, glue), X -> adde X, 0, glue - if (N0->hasOneUse() && isCarryMaterialization(N0)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1, - DAG.getConstant(0, VT), N0.getOperand(2)); - - // addc X, (adde 0, 0, glue) -> adde X, 0, glue - if (N1->hasOneUse() && isCarryMaterialization(N1)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0, - DAG.getConstant(0, VT), N1.getOperand(2)); - return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 797f174..ea7fead 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -547,7 +547,7 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst *DI = cast<DbgValueInst>(Call); - const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to @@ -556,9 +556,14 @@ bool FastISel::SelectCall(const User *I) { .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addImm(CI->getZExtValue()).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + if (CI->getBitWidth() > 64) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addCImm(CI).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addFPImm(CF).addImm(DI->getOffset()) @@ -1085,7 +1090,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); return ResultReg; @@ -1095,7 +1100,7 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1115,7 +1120,7 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1137,7 +1142,7 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1160,7 +1165,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1181,7 +1186,7 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1204,7 +1209,7 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1226,7 +1231,7 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1248,7 +1253,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); @@ -1264,7 +1269,7 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 2a65d65..f0f4743 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -106,10 +106,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, continue; Match = false; if (User->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = 0; if (i+II.getNumDefs() < II.getNumOperands()) - RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI); + RC = TII->getRegClass(II, i+II.getNumDefs(), TRI); if (!UseRC) UseRC = RC; else if (RC) { @@ -178,7 +178,7 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, } void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, + const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && @@ -189,7 +189,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; - const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -242,7 +242,7 @@ unsigned InstrEmitter::getVR(SDValue Op, Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); - // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); @@ -265,7 +265,7 @@ unsigned InstrEmitter::getVR(SDValue Op, void InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && @@ -275,9 +275,9 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned VReg = getVR(Op, VRBaseMap); assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); - const TargetInstrDesc &TID = MI->getDesc(); - bool isOptDef = IIOpNum < TID.getNumOperands() && - TID.OpInfo[IIOpNum].isOptionalDef(); + const MCInstrDesc &MCID = MI->getDesc(); + bool isOptDef = IIOpNum < MCID.getNumOperands() && + MCID.OpInfo[IIOpNum].isOptionalDef(); // If the instruction requires a register in a different class, create // a new virtual register and copy the value into it. @@ -285,8 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) - DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); - assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + DstRC = TII->getRegClass(*II, IIOpNum, TRI); + assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); @@ -312,7 +312,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, while (Idx > 0 && MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1; if (isTied) isKill = false; } @@ -330,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, /// assertions only. void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { @@ -556,7 +556,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, unsigned NumOps = Node->getNumOperands(); assert((NumOps & 1) == 1 && "REG_SEQUENCE must have an odd number of operands!"); - const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); if ((i & 1) == 0) { @@ -597,7 +597,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); } // Otherwise, we're going to create an instruction here. - const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { SDNode *Node = SD->getSDNode(); @@ -616,12 +616,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // FIXME: SDDbgValue constants aren't updated with legalization, so it's - // possible to have i128 constants in them at this point. Dwarf writer - // does not handle i128 constants at the moment so, as a crude workaround, - // just drop the debug info if this happens. - if (!CI->getValue().isSignedIntN(64)) - MIB.addReg(0U); + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); else MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { @@ -672,7 +668,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // We want a unique VR for each IMPLICIT_DEF use. return; - const TargetInstrDesc &II = TII->get(Opc); + const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NodeOperands = CountOperands(Node); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; @@ -701,9 +697,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); else { // Collect declared implicit uses. - const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); - UsedRegs.append(TID.getImplicitUses(), - TID.getImplicitUses() + TID.getNumImplicitUses()); + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) @@ -855,6 +851,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } break; case InlineAsm::Kind_RegDefEarlyClobber: + case InlineAsm::Kind_Clobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 02c044c..19fc044 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -22,7 +22,7 @@ namespace llvm { -class TargetInstrDesc; +class MCInstrDesc; class SDDbgValue; class InstrEmitter { @@ -49,7 +49,7 @@ class InstrEmitter { unsigned ResNo) const; void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, + const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); @@ -63,7 +63,7 @@ class InstrEmitter { /// not in the required register class. void AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); @@ -73,7 +73,7 @@ class InstrEmitter { /// assertions only. void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 7b560d1..b275c63 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -249,14 +249,14 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) NewSU->isCommutable = true; // LoadNode may already exist. This can happen when there is another @@ -422,10 +422,10 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// FIXME: Move to SelectionDAG? static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = TID.getNumDefs(); - for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -490,7 +490,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags)) { + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -504,10 +505,10 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } if (!Node->isMachineOpcode()) continue; - const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); - if (!TID.ImplicitDefs) + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index a827187..12b1838 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -302,8 +302,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, } unsigned Idx = RegDefPos.GetIdx(); - const TargetInstrDesc Desc = TII->get(Opcode); - const TargetRegisterClass *RC = Desc.getRegClass(Idx, TRI); + const MCInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a // better way to determine it. @@ -837,14 +837,14 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); @@ -1024,10 +1024,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// FIXME: Move to SelectionDAG? static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = TID.getNumDefs(); - for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1092,7 +1092,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags)) { + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -1107,10 +1108,10 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { if (!Node->isMachineOpcode()) continue; - const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); - if (!TID.ImplicitDefs) + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) + for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -2028,13 +2029,9 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) { unsigned POpc = PN->getMachineOpcode(); if (POpc == TargetOpcode::IMPLICIT_DEF) continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { + if (POpc == TargetOpcode::EXTRACT_SUBREG || + POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { EVT VT = PN->getValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); @@ -2609,11 +2606,11 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { if (SU->isTwoAddress) { unsigned Opc = SU->getNode()->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - unsigned NumRes = TID.getNumDefs(); - unsigned NumOps = TID.getNumOperands() - NumRes; + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; for (unsigned i = 0; i != NumOps; ++i) { - if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { SDNode *DU = SU->getNode()->getOperand(i).getNode(); if (DU->getNodeId() != -1 && Op->OrigNode == &(*SUnits)[DU->getNodeId()]) @@ -2793,11 +2790,11 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { bool isLiveOut = hasOnlyLiveOutUses(SU); unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - unsigned NumRes = TID.getNumDefs(); - unsigned NumOps = TID.getNumOperands() - NumRes; + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; for (unsigned j = 0; j != NumOps; ++j) { - if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) + if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) continue; SDNode *DU = SU->getNode()->getOperand(j).getNode(); if (DU->getNodeId() == -1) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index dbc623b..63ca326 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,6 +17,7 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -111,7 +112,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, unsigned ResNo = User->getOperand(2).getResNo(); if (Def->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; @@ -255,8 +256,8 @@ void ScheduleDAGSDNodes::ClusterNodes() { continue; unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - if (TID.mayLoad()) + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.mayLoad()) // Cluster loads from "near" addresses into combined SUnits. ClusterNeighboringLoads(Node); } @@ -390,14 +391,14 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(Opc); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) SU->isCommutable = true; } @@ -520,14 +521,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { for (;DefIdx < NodeNumDefs; ++DefIdx) { if (!Node->hasAnyUseOfValue(DefIdx)) continue; - if (Node->isMachineOpcode() && - Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { - // Propagate the incoming (full-register) type. I doubt it's needed. - ValueType = Node->getOperand(0).getValueType(); - } - else { - ValueType = Node->getValueType(DefIdx); - } + ValueType = Node->getValueType(DefIdx); ++DefIdx; return; // Found a normal regdef. } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 3ad2bd6..9c27b2e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -140,7 +140,7 @@ namespace llvm { } unsigned GetIdx() const { - return DefIdx; + return DefIdx-1; } void Advance(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 19bfa33..ea59ca1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5428,55 +5428,6 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; } // end anonymous namespace -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - if (!RC->isAllocatable()) - continue; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(MF); - if (std::find(RawOrder.begin(), RawOrder.end(), Reg) != RawOrder.end()) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} - /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm @@ -5611,58 +5562,6 @@ static void GetRegistersForValue(SelectionDAG &DAG, return; } - // This is a reference to a register class that doesn't directly correspond - // to an LLVM register class. Allocate NumRegs consecutive, available, - // registers from the class. - std::vector<unsigned> RegClassRegs - = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - BitVector Reserved = TRI->getReservedRegs(MF); - unsigned NumAllocated = 0; - for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { - unsigned Reg = RegClassRegs[i]; - // Filter out the reserved registers, but note that reserved registers are - // not fully determined at this point. We may still decide we need a frame - // pointer. - if (Reserved.test(Reg)) - continue; - // See if this register is available. - if ((isOutReg && OutputRegs.count(Reg)) || // Already used. - (isInReg && InputRegs.count(Reg))) { // Already used. - // Make sure we find consecutive registers. - NumAllocated = 0; - continue; - } - - // Check to see if this register is allocatable (i.e. don't give out the - // stack pointer). - const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); - if (!RC) { // Couldn't allocate this register. - // Reset NumAllocated to make sure we return consecutive registers. - NumAllocated = 0; - continue; - } - - // Okay, this register is good, we can use it. - ++NumAllocated; - - // If we allocated enough consecutive registers, succeed. - if (NumAllocated == NumRegs) { - unsigned RegStart = (i-NumAllocated)+1; - unsigned RegEnd = i+1; - // Mark all of the allocated registers used. - for (unsigned i = RegStart; i != RegEnd; ++i) - Regs.push_back(RegClassRegs[i]); - - OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), - OpInfo.ConstraintVT); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); - return; - } - } - // Otherwise, we couldn't allocate enough registers for this. } @@ -6051,8 +5950,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands( - InlineAsm::Kind_RegDefEarlyClobber, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, DAG, AsmNodeOperands); break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index dc8044b..87bb296 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -354,9 +354,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const MachineBasicBlock *MBB = I; for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); - if ((TID.isCall() && !TID.isReturn()) || + if ((MCID.isCall() && !MCID.isReturn()) || II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); goto done; @@ -681,7 +681,7 @@ void SelectionDAGISel::PrepareEHLandingPad() { // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); - const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); @@ -2611,9 +2611,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated // instruction may load or store. - const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc); - bool mayLoad = TID.mayLoad(); - bool mayStore = TID.mayStore(); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc); + bool mayLoad = MCID.mayLoad(); + bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; for (SmallVector<MachineMemOperand*, 2>::const_iterator I = diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 474dd7a..758296e 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2737,13 +2737,6 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } -std::vector<unsigned> TargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - return std::vector<unsigned>(); -} - - std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp deleted file mode 100644 index 221bec5..0000000 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ /dev/null @@ -1,1539 +0,0 @@ -//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a simple register coalescing pass that attempts to -// aggressively coalesce every register copy that it can. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regcoalescing" -#include "SimpleRegisterCoalescing.h" -#include "VirtRegMap.h" -#include "LiveDebugVariables.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/Value.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterCoalescer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include <algorithm> -#include <cmath> -using namespace llvm; - -STATISTIC(numJoins , "Number of interval joins performed"); -STATISTIC(numCrossRCs , "Number of cross class joins performed"); -STATISTIC(numCommutes , "Number of instruction commuting performed"); -STATISTIC(numExtends , "Number of copies extended"); -STATISTIC(NumReMats , "Number of instructions re-materialized"); -STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); -STATISTIC(numAborts , "Number of times interval joining aborted"); - -char SimpleRegisterCoalescing::ID = 0; -static cl::opt<bool> -EnableJoining("join-liveintervals", - cl::desc("Coalesce copies (default=true)"), - cl::init(true)); - -static cl::opt<bool> -DisableCrossClassJoin("disable-cross-class-join", - cl::desc("Avoid coalescing cross register class copies"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -EnablePhysicalJoin("join-physregs", - cl::desc("Join physical register copies"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -VerifyCoalescing("verify-coalescing", - cl::desc("Verify machine instrs before and after register coalescing"), - cl::Hidden); - -INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer, - "simple-register-coalescing", "Simple Register Coalescing", - false, false, true) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(PHIElimination) -INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer, - "simple-register-coalescing", "Simple Register Coalescing", - false, false, true) - -char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID; - -void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<LiveDebugVariables>(); - AU.addPreserved<LiveDebugVariables>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(StrongPHIEliminationID); - AU.addPreservedID(PHIEliminationID); - AU.addPreservedID(TwoAddressInstructionPassID); - MachineFunctionPass::getAnalysisUsage(AU); -} - -void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) { - /// Joined copies are not deleted immediately, but kept in JoinedCopies. - JoinedCopies.insert(CopyMI); - - /// Mark all register operands of CopyMI as <undef> so they won't affect dead - /// code elimination. - for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), - E = CopyMI->operands_end(); I != E; ++I) - if (I->isReg()) - I->setIsUndef(true); -} - -/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA -/// being the source and IntB being the dest, thus this defines a value number -/// in IntB. If the source value number (in IntA) is defined by a copy from B, -/// see if we can merge these two pieces of B into a single value number, -/// eliminating a copy. For example: -/// -/// A3 = B0 -/// ... -/// B1 = A3 <- this copy -/// -/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 -/// value number to be replaced with B0 (which simplifies the B liveinterval). -/// -/// This returns true if an interval was modified. -/// -bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // Bail if there is no dst interval - can happen when merging physical subreg - // operations. - if (!li_->hasInterval(CP.getDstReg())) - return false; - - LiveInterval &IntA = - li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); - LiveInterval &IntB = - li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - - // BValNo is a value number in B that is defined by a copy from A. 'B3' in - // the example above. - LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - if (BLR == IntB.end()) return false; - VNInfo *BValNo = BLR->valno; - - // Get the location that B is defined at. Two options: either this value has - // an unknown definition point or it is defined at CopyIdx. If unknown, we - // can't process it. - if (!BValNo->isDefByCopy()) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - - // AValNo is the value number in A that defines the copy, A3 in the example. - SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - // The live range might not exist after fun with physreg coalescing. - if (ALR == IntA.end()) return false; - VNInfo *AValNo = ALR->valno; - // If it's re-defined by an early clobber somewhere in the live range, then - // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. - // See PR3149: - // 172 %ECX<def> = MOV32rr %reg1039<kill> - // 180 INLINEASM <es:subl $5,$1 - // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, - // %EAX<kill>, - // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 - // 188 %EAX<def> = MOV32rr %EAX<kill> - // 196 %ECX<def> = MOV32rr %ECX<kill> - // 204 %ECX<def> = MOV32rr %ECX<kill> - // 212 %EAX<def> = MOV32rr %EAX<kill> - // 220 %EAX<def> = MOV32rr %EAX - // 228 %reg1039<def> = MOV32rr %ECX<kill> - // The early clobber operand ties ECX input to the ECX def. - // - // The live interval of ECX is represented as this: - // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) - // The coalescer has no idea there was a def in the middle of [174,230]. - if (AValNo->hasRedefByEC()) - return false; - - // If AValNo is defined as a copy from IntB, we can potentially process this. - // Get the instruction that defines this value number. - if (!CP.isCoalescable(AValNo->getCopy())) - return false; - - // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = - IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - if (ValLR == IntB.end()) - return false; - - // Make sure that the end of the live range is inside the same block as - // CopyMI. - MachineInstr *ValLREndInst = - li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) - return false; - - // Okay, we now know that ValLR ends in the same block that the CopyMI - // live-range starts. If there are no intervening live ranges between them in - // IntB, we can merge them. - if (ValLR+1 != BLR) return false; - - // If a live interval is a physical register, conservatively check if any - // of its aliases is overlapping the live interval of the virtual register. - // If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) - if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { - DEBUG({ - dbgs() << "\t\tInterfere with alias "; - li_->getInterval(*AS).print(dbgs(), tri_); - }); - return false; - } - } - - DEBUG({ - dbgs() << "Extending: "; - IntB.print(dbgs(), tri_); - }); - - SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; - // We are about to delete CopyMI, so need to remove it as the 'instruction - // that defines this value #'. Update the valnum with the new defining - // instruction #. - BValNo->def = FillerStart; - BValNo->setCopy(0); - - // Okay, we can merge them. We need to insert a new liverange: - // [ValLR.end, BLR.begin) of either value number, then we merge the - // two value numbers. - IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); - - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - LiveInterval &SRLI = li_->getInterval(*SR); - SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, 0, - li_->getVNInfoAllocator()))); - } - } - - // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) { - // If B1 is killed by a PHI, then the merged live range must also be killed - // by the same PHI, as B0 and B1 can not overlap. - bool HasPHIKill = BValNo->hasPHIKill(); - IntB.MergeValueNumberInto(BValNo, ValLR->valno); - if (HasPHIKill) - ValLR->valno->setHasPHIKill(true); - } - DEBUG({ - dbgs() << " result = "; - IntB.print(dbgs(), tri_); - dbgs() << "\n"; - }); - - // If the source instruction was killing the source register before the - // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); - if (UIdx != -1) { - ValLREndInst->getOperand(UIdx).setIsKill(false); - } - - // If the copy instruction was killing the destination register before the - // merge, find the last use and trim the live range. That will also add the - // isKill marker. - if (ALR->end == CopyIdx) - li_->shrinkToUses(&IntA); - - ++numExtends; - return true; -} - -/// HasOtherReachingDefs - Return true if there are definitions of IntB -/// other than BValNo val# that can reach uses of AValno val# of IntA. -bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, - LiveInterval &IntB, - VNInfo *AValNo, - VNInfo *BValNo) { - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; - LiveInterval::Ranges::iterator BI = - std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); - if (BI != IntB.ranges.begin()) - --BI; - for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { - if (BI->valno == BValNo) - continue; - if (BI->start <= AI->start && BI->end > AI->start) - return true; - if (BI->start > AI->start && BI->start < AI->end) - return true; - } - } - return false; -} - -/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with -/// IntA being the source and IntB being the dest, thus this defines a value -/// number in IntB. If the source value number (in IntA) is defined by a -/// commutable instruction and its other operand is coalesced to the copy dest -/// register, see if we can transform the copy into a noop by commuting the -/// definition. For example, -/// -/// A3 = op A2 B0<kill> -/// ... -/// B1 = A3 <- this copy -/// ... -/// = op A3 <- more uses -/// -/// ==> -/// -/// B2 = op B0 A2<kill> -/// ... -/// B1 = B2 <- now an identify copy -/// ... -/// = op B2 <- more uses -/// -/// This returns true if an interval was modified. -/// -bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // FIXME: For now, only eliminate the copy by commuting its def when the - // source register is a virtual register. We want to guard against cases - // where the copy is a back edge copy and commuting the def lengthen the - // live interval of the source register to the entire loop. - if (CP.isPhys() && CP.isFlipped()) - return false; - - // Bail if there is no dst interval. - if (!li_->hasInterval(CP.getDstReg())) - return false; - - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - - LiveInterval &IntA = - li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); - LiveInterval &IntB = - li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - - // BValNo is a value number in B that is defined by a copy from A. 'B3' in - // the example above. - VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); - if (!BValNo || !BValNo->isDefByCopy()) - return false; - - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - - // AValNo is the value number in A that defines the copy, A3 in the example. - VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); - assert(AValNo && "COPY source not live"); - - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) - return false; - MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); - if (!DefMI) - return false; - const TargetInstrDesc &TID = DefMI->getDesc(); - if (!TID.isCommutable()) - return false; - // If DefMI is a two-address instruction then commuting it will change the - // destination register. - int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); - assert(DefIdx != -1); - unsigned UseOpIdx; - if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) - return false; - unsigned Op1, Op2, NewDstIdx; - if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) - return false; - if (Op1 == UseOpIdx) - NewDstIdx = Op2; - else if (Op2 == UseOpIdx) - NewDstIdx = Op1; - else - return false; - - MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !NewDstMO.isKill()) - return false; - - // Make sure there are no other definitions of IntB that would reach the - // uses which the new definition can reach. - if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) - return false; - - // Abort if the aliases of IntB.reg have values that are not simply the - // clobbers from the superreg. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) - if (li_->hasInterval(*AS) && - HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) - return false; - - // If some of the uses of IntA.reg is already coalesced away, return false. - // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineRegisterInfo::use_nodbg_iterator UI = - mri_->use_nodbg_begin(IntA.reg), - UE = mri_->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - SlotIndex UseIdx = li_->getInstructionIndex(UseMI); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end()) - continue; - if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) - return false; - } - - DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' - << *DefMI); - - // At this point we have decided that it is legal to do this - // transformation. Start by commuting the instruction. - MachineBasicBlock *MBB = DefMI->getParent(); - MachineInstr *NewMI = tii_->commuteInstruction(DefMI); - if (!NewMI) - return false; - if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && - TargetRegisterInfo::isVirtualRegister(IntB.reg) && - !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg))) - return false; - if (NewMI != DefMI) { - li_->ReplaceMachineInstrInMaps(DefMI, NewMI); - MBB->insert(DefMI, NewMI); - MBB->erase(DefMI); - } - unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); - NewMI->getOperand(OpIdx).setIsKill(); - - // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. - // A = or A, B - // ... - // B = A - // ... - // C = A<kill> - // ... - // = B - - // Update uses of IntA of the specific Val# with IntB. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), - UE = mri_->use_end(); UI != UE;) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; - ++UI; - if (JoinedCopies.count(UseMI)) - continue; - if (UseMI->isDebugValue()) { - // FIXME These don't have an instruction index. Not clear we have enough - // info to decide whether to do this replacement or not. For now do it. - UseMO.setReg(NewReg); - continue; - } - SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) - continue; - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) - UseMO.substPhysReg(NewReg, *tri_); - else - UseMO.setReg(NewReg); - if (UseMI == CopyMI) - continue; - if (!UseMI->isCopy()) - continue; - if (UseMI->getOperand(0).getReg() != IntB.reg || - UseMI->getOperand(0).getSubReg()) - continue; - - // This copy will become a noop. If it's defining a new val#, merge it into - // BValNo. - SlotIndex DefIdx = UseIdx.getDefIndex(); - VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); - if (!DVNI) - continue; - DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); - assert(DVNI->def == DefIdx); - BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); - markAsJoined(UseMI); - } - - // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition - // is updated. - VNInfo *ValNo = BValNo; - ValNo->def = AValNo->def; - ValNo->setCopy(0); - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; - IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); - } - DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); - - IntA.removeValNo(AValNo); - DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); - ++numCommutes; - return true; -} - -/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial -/// computation, replace the copy by rematerialize the definition. -bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, - bool preserveSrcInt, - unsigned DstReg, - unsigned DstSubIdx, - MachineInstr *CopyMI) { - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex(); - LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); - assert(SrcLR != SrcInt.end() && "Live range not found!"); - VNInfo *ValNo = SrcLR->valno; - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill()) - return false; - MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); - if (!DefMI) - return false; - assert(DefMI && "Defining instruction disappeared"); - const TargetInstrDesc &TID = DefMI->getDesc(); - if (!TID.isAsCheapAsAMove()) - return false; - if (!tii_->isTriviallyReMaterializable(DefMI, AA)) - return false; - bool SawStore = false; - if (!DefMI->isSafeToMove(tii_, AA, SawStore)) - return false; - if (TID.getNumDefs() != 1) - return false; - if (!DefMI->isImplicitDef()) { - // Make sure the copy destination register class fits the instruction - // definition register class. The mismatch can happen as a result of earlier - // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (mri_->getRegClass(DstReg) != RC) - return false; - } else if (!RC->contains(DstReg)) - return false; - } - - // If destination register has a sub-register index on it, make sure it - // matches the instruction register class. - if (DstSubIdx) { - const TargetInstrDesc &TID = DefMI->getDesc(); - if (TID.getNumDefs() != 1) - return false; - const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); - const TargetRegisterClass *DstSubRC = - DstRC->getSubRegisterRegClass(DstSubIdx); - const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_); - if (DefRC == DstRC) - DstSubIdx = 0; - else if (DefRC != DstSubRC) - return false; - } - - RemoveCopyFlag(DstReg, CopyMI); - - MachineBasicBlock *MBB = CopyMI->getParent(); - MachineBasicBlock::iterator MII = - llvm::next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); - MachineInstr *NewMI = prior(MII); - - // CopyMI may have implicit operands, transfer them over to the newly - // rematerialized instruction. And update implicit def interval valnos. - for (unsigned i = CopyMI->getDesc().getNumOperands(), - e = CopyMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = CopyMI->getOperand(i); - if (MO.isReg() && MO.isImplicit()) - NewMI->addOperand(MO); - if (MO.isDef()) - RemoveCopyFlag(MO.getReg(), CopyMI); - } - - NewMI->copyImplicitOps(CopyMI); - li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); - CopyMI->eraseFromParent(); - ReMatCopies.insert(CopyMI); - ReMatDefs.insert(DefMI); - DEBUG(dbgs() << "Remat: " << *NewMI); - ++NumReMats; - - // The source interval can become smaller because we removed a use. - if (preserveSrcInt) - li_->shrinkToUses(&SrcInt); - - return true; -} - -/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and -/// update the subregister number if it is not zero. If DstReg is a -/// physical register and the existing subregister number of the def / use -/// being updated is not zero, make sure to set it to the correct physical -/// subregister. -void -SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { - bool DstIsPhys = CP.isPhys(); - unsigned SrcReg = CP.getSrcReg(); - unsigned DstReg = CP.getDstReg(); - unsigned SubIdx = CP.getSubIdx(); - - // Update LiveDebugVariables. - ldv_->renameRegister(SrcReg, DstReg, SubIdx); - - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); - MachineInstr *UseMI = I.skipInstruction();) { - // A PhysReg copy that won't be coalesced can perhaps be rematerialized - // instead. - if (DstIsPhys) { - if (UseMI->isCopy() && - !UseMI->getOperand(1).getSubReg() && - !UseMI->getOperand(0).getSubReg() && - UseMI->getOperand(1).getReg() == SrcReg && - UseMI->getOperand(0).getReg() != SrcReg && - UseMI->getOperand(0).getReg() != DstReg && - !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), false, - UseMI->getOperand(0).getReg(), 0, UseMI)) - continue; - } - - SmallVector<unsigned,8> Ops; - bool Reads, Writes; - tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); - bool Kills = false, Deads = false; - - // Replace SrcReg with DstReg in all UseMI operands. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(Ops[i]); - Kills |= MO.isKill(); - Deads |= MO.isDead(); - - if (DstIsPhys) - MO.substPhysReg(DstReg, *tri_); - else - MO.substVirtReg(DstReg, SubIdx, *tri_); - } - - // This instruction is a copy that will be removed. - if (JoinedCopies.count(UseMI)) - continue; - - if (SubIdx) { - // If UseMI was a simple SrcReg def, make sure we didn't turn it into a - // read-modify-write of DstReg. - if (Deads) - UseMI->addRegisterDead(DstReg, tri_); - else if (!Reads && Writes) - UseMI->addRegisterDefined(DstReg, tri_); - - // Kill flags apply to the whole physical register. - if (DstIsPhys && Kills) - UseMI->addRegisterKilled(DstReg, tri_); - } - - DEBUG({ - dbgs() << "\t\tupdated: "; - if (!UseMI->isDebugValue()) - dbgs() << li_->getInstructionIndex(UseMI) << "\t"; - dbgs() << *UseMI; - }); - } -} - -/// removeIntervalIfEmpty - Check if the live interval of a physical register -/// is empty, if so remove it and also remove the empty intervals of its -/// sub-registers. Return true if live interval is removed. -static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, - const TargetRegisterInfo *tri_) { - if (li.empty()) { - if (TargetRegisterInfo::isPhysicalRegister(li.reg)) - for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - LiveInterval &sli = li_->getInterval(*SR); - if (sli.empty()) - li_->removeInterval(*SR); - } - li_->removeInterval(li.reg); - return true; - } - return false; -} - -/// RemoveDeadDef - If a def of a live interval is now determined dead, remove -/// the val# it defines. If the live interval becomes empty, remove it as well. -bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, - MachineInstr *DefMI) { - SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex(); - LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); - if (DefIdx != MLR->valno->def) - return false; - li.removeValNo(MLR->valno); - return removeIntervalIfEmpty(li, li_, tri_); -} - -void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, - const MachineInstr *CopyMI) { - SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - if (li_->hasInterval(DstReg)) { - LiveInterval &LI = li_->getInterval(DstReg); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } - if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) - return; - for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { - if (!li_->hasInterval(*AS)) - continue; - LiveInterval &LI = li_->getInterval(*AS); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } -} - -/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. -/// We need to be careful about coalescing a source physical register with a -/// virtual register. Once the coalescing is done, it cannot be broken and these -/// are not spillable! If the destination interval uses are far away, think -/// twice about coalescing them! -bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { - bool Allocatable = li_->isAllocatable(CP.getDstReg()); - LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); - - /// Always join simple intervals that are defined by a single copy from a - /// reserved register. This doesn't increase register pressure, so it is - /// always beneficial. - if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) - return true; - - if (!EnablePhysicalJoin) { - DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); - return false; - } - - // Only coalesce to allocatable physreg, we don't want to risk modifying - // reserved registers. - if (!Allocatable) { - DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); - return false; // Not coalescable. - } - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (li_->hasInterval(CP.getDstReg()) && - li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } - - // FIXME: Why are we skipping this test for partial copies? - // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. - if (!CP.isPartial()) { - const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); - unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold) { - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - return false; - } - } - return true; -} - -/// isWinToJoinCrossClass - Return true if it's profitable to coalesce -/// two virtual registers from different register classes. -bool -SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, - unsigned DstReg, - const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *NewRC) { - unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); - // This heuristics is good enough in practice, but it's obviously not *right*. - // 4 is a magic number that works well enough for x86, ARM, etc. It filter - // out all but the most restrictive register classes. - if (NewRCCount > 4 || - // Early exit if the function is fairly small, coalesce aggressively if - // that's the case. For really special register classes with 3 or - // fewer registers, be a bit more careful. - (li_->getFuncInstructionCount() / NewRCCount) < 8) - return true; - LiveInterval &SrcInt = li_->getInterval(SrcReg); - LiveInterval &DstInt = li_->getInterval(DstReg); - unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); - unsigned DstSize = li_->getApproximateInstructionCount(DstInt); - - // Coalesce aggressively if the intervals are small compared to the number of - // registers in the new class. The number 4 is fairly arbitrary, chosen to be - // less aggressive than the 8 used for the whole function size. - const unsigned ThresSize = 4 * NewRCCount; - if (SrcSize <= ThresSize && DstSize <= ThresSize) - return true; - - // Estimate *register use density*. If it doubles or more, abort. - unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), - mri_->use_nodbg_end()); - unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), - mri_->use_nodbg_end()); - unsigned NewUses = SrcUses + DstUses; - unsigned NewSize = SrcSize + DstSize; - if (SrcRC != NewRC && SrcSize > ThresSize) { - unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); - if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) - return false; - } - if (DstRC != NewRC && DstSize > ThresSize) { - unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); - if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) - return false; - } - return true; -} - - -/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, -/// which are the src/dst of the copy instruction CopyMI. This returns true -/// if the copy was successfully coalesced away. If it is not currently -/// possible to coalesce this interval, but it may be possible if other -/// things get coalesced, then it returns true by reference in 'Again'. -bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { - MachineInstr *CopyMI = TheCopy.MI; - - Again = false; - if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) - return false; // Already done. - - DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - - CoalescerPair CP(*tii_, *tri_); - if (!CP.setRegisters(CopyMI)) { - DEBUG(dbgs() << "\tNot coalescable.\n"); - return false; - } - - // If they are already joined we continue. - if (CP.getSrcReg() == CP.getDstReg()) { - markAsJoined(CopyMI); - DEBUG(dbgs() << "\tCopy already coalesced.\n"); - return false; // Not coalescable. - } - - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) - << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) - << "\n"); - - // Enforce policies. - if (CP.isPhys()) { - if (!shouldJoinPhys(CP)) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, - CP.getDstReg(), 0, CopyMI)) - return true; - return false; - } - } else { - // Avoid constraining virtual register regclass too much. - if (CP.isCrossClass()) { - DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); - if (DisableCrossClassJoin) { - DEBUG(dbgs() << "\tCross-class joins disabled.\n"); - return false; - } - if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), - mri_->getRegClass(CP.getSrcReg()), - mri_->getRegClass(CP.getDstReg()), - CP.getNewRC())) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - - // When possible, let DstReg be the larger interval. - if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > - li_->getInterval(CP.getDstReg()).ranges.size()) - CP.flip(); - } - - // Okay, attempt to join these two intervals. On failure, this returns false. - // Otherwise, if one of the intervals being joined is a physreg, this method - // always canonicalizes DstInt to be it. The output "SrcInt" will not have - // been modified, so we can use this information below to update aliases. - if (!JoinIntervals(CP)) { - // Coalescing failed. - - // If definition of source is defined by trivial computation, try - // rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, - CP.getDstReg(), 0, CopyMI)) - return true; - - // If we can eliminate the copy without merging the live ranges, do so now. - if (!CP.isPartial()) { - if (AdjustCopiesBackFrom(CP, CopyMI) || - RemoveCopyByCommutingDef(CP, CopyMI)) { - markAsJoined(CopyMI); - DEBUG(dbgs() << "\tTrivial!\n"); - return true; - } - } - - // Otherwise, we are unable to join the intervals. - DEBUG(dbgs() << "\tInterference!\n"); - Again = true; // May be possible to coalesce later. - return false; - } - - // Coalescing to a virtual register that is of a sub-register class of the - // other. Make sure the resulting register is set to the right register class. - if (CP.isCrossClass()) { - ++numCrossRCs; - mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); - } - - // Remember to delete the copy instruction. - markAsJoined(CopyMI); - - UpdateRegDefsUses(CP); - - // If we have extended the live range of a physical register, make sure we - // update live-in lists as well. - if (CP.isPhys()) { - SmallVector<MachineBasicBlock*, 16> BlockSeq; - // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the - // ranges for this, and they are preserved. - LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); - for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); - I != E; ++I ) { - li_->findLiveInMBBs(I->start, I->end, BlockSeq); - for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { - MachineBasicBlock &block = *BlockSeq[idx]; - if (!block.isLiveIn(CP.getDstReg())) - block.addLiveIn(CP.getDstReg()); - } - BlockSeq.clear(); - } - } - - // SrcReg is guarateed to be the register whose live interval that is - // being merged. - li_->removeInterval(CP.getSrcReg()); - - // Update regalloc hint. - tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); - - DEBUG({ - LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); - dbgs() << "\tJoined. Result = "; - DstInt.print(dbgs(), tri_); - dbgs() << "\n"; - }); - - ++numJoins; - return true; -} - -/// ComputeUltimateVN - Assuming we are going to join two live intervals, -/// compute what the resultant value numbers for each value in the input two -/// ranges will be. This is complicated by copies between the two which can -/// and will commonly cause multiple value numbers to be merged into one. -/// -/// VN is the value number that we're trying to resolve. InstDefiningValue -/// keeps track of the new InstDefiningValue assignment for the result -/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of -/// whether a value in this or other is a copy from the opposite set. -/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have -/// already been assigned. -/// -/// ThisFromOther[x] - If x is defined as a copy from the other interval, this -/// contains the value number the copy is from. -/// -static unsigned ComputeUltimateVN(VNInfo *VNI, - SmallVector<VNInfo*, 16> &NewVNInfo, - DenseMap<VNInfo*, VNInfo*> &ThisFromOther, - DenseMap<VNInfo*, VNInfo*> &OtherFromThis, - SmallVector<int, 16> &ThisValNoAssignments, - SmallVector<int, 16> &OtherValNoAssignments) { - unsigned VN = VNI->id; - - // If the VN has already been computed, just return it. - if (ThisValNoAssignments[VN] >= 0) - return ThisValNoAssignments[VN]; - assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); - - // If this val is not a copy from the other val, then it must be a new value - // number in the destination. - DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); - if (I == ThisFromOther.end()) { - NewVNInfo.push_back(VNI); - return ThisValNoAssignments[VN] = NewVNInfo.size()-1; - } - VNInfo *OtherValNo = I->second; - - // Otherwise, this *is* a copy from the RHS. If the other side has already - // been computed, return it. - if (OtherValNoAssignments[OtherValNo->id] >= 0) - return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; - - // Mark this value number as currently being computed, then ask what the - // ultimate value # of the other value is. - ThisValNoAssignments[VN] = -2; - unsigned UltimateVN = - ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, - OtherValNoAssignments, ThisValNoAssignments); - return ThisValNoAssignments[VN] = UltimateVN; -} - -/// JoinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. -bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { - LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); - DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); - - // If a live interval is a physical register, check for interference with any - // aliases. The interference check implemented here is a bit more conservative - // than the full interfeence check below. We allow overlapping live ranges - // only when one is a copy of the other. - if (CP.isPhys()) { - for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ - if (!li_->hasInterval(*AS)) - continue; - const LiveInterval &LHS = li_->getInterval(*AS); - LiveInterval::const_iterator LI = LHS.begin(); - for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); - RI != RE; ++RI) { - LI = std::lower_bound(LI, LHS.end(), RI->start); - // Does LHS have an overlapping live range starting before RI? - if ((LI != LHS.begin() && LI[-1].end > RI->start) && - (RI->start != RI->valno->def || - !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), tri_); - dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; - }); - return false; - } - - // Check that LHS ranges beginning in this range are copies. - for (; LI != LHS.end() && LI->start < RI->end; ++LI) { - if (LI->start != LI->valno->def || - !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), tri_); - dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; - }); - return false; - } - } - } - } - } - - // Compute the final value assignment, assuming that the live ranges can be - // coalesced. - SmallVector<int, 16> LHSValNoAssignments; - SmallVector<int, 16> RHSValNoAssignments; - DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; - DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; - SmallVector<VNInfo*, 16> NewVNInfo; - - LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); - DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); - - // Loop over the value numbers of the LHS, seeing if any are defined from - // the RHS. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? - continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; - - // DstReg is known to be a register in the LHS interval. If the src is - // from the RHS interval, we can use its value #. - if (!CP.isCoalescable(VNI->getCopy())) - continue; - - // Figure out the value # from the RHS. - LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - // The copy could be to an aliased physreg. - if (!lr) continue; - LHSValsDefinedFromRHS[VNI] = lr->valno; - } - - // Loop over the value numbers of the RHS, seeing if any are defined from - // the LHS. - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? - continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; - - // DstReg is known to be a register in the RHS interval. If the src is - // from the LHS interval, we can use its value #. - if (!CP.isCoalescable(VNI->getCopy())) - continue; - - // Figure out the value # from the LHS. - LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - // The copy could be to an aliased physreg. - if (!lr) continue; - RHSValsDefinedFromLHS[VNI] = lr->valno; - } - - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); - - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - ComputeUltimateVN(VNI, NewVNInfo, - LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, - LHSValNoAssignments, RHSValNoAssignments); - } - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - // If this value number isn't a copy from the LHS, it's a new number. - if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { - NewVNInfo.push_back(VNI); - RHSValNoAssignments[VN] = NewVNInfo.size()-1; - continue; - } - - ComputeUltimateVN(VNI, NewVNInfo, - RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, - RHSValNoAssignments, LHSValNoAssignments); - } - - // Armed with the mappings of LHS/RHS values to ultimate values, walk the - // interval lists to see if these intervals are coalescable. - LiveInterval::const_iterator I = LHS.begin(); - LiveInterval::const_iterator IE = LHS.end(); - LiveInterval::const_iterator J = RHS.begin(); - LiveInterval::const_iterator JE = RHS.end(); - - // Skip ahead until the first place of potential sharing. - if (I != IE && J != JE) { - if (I->start < J->start) { - I = std::upper_bound(I, IE, J->start); - if (I != LHS.begin()) --I; - } else if (J->start < I->start) { - J = std::upper_bound(J, JE, I->start); - if (J != RHS.begin()) --J; - } - } - - while (I != IE && J != JE) { - // Determine if these two live ranges overlap. - bool Overlaps; - if (I->start < J->start) { - Overlaps = I->end > J->start; - } else { - Overlaps = J->end > I->start; - } - - // If so, check value # info to determine if they are really different. - if (Overlaps) { - // If the live range overlap will map to the same value number in the - // result liverange, we can still coalesce them. If not, we can't. - if (LHSValNoAssignments[I->valno->id] != - RHSValNoAssignments[J->valno->id]) - return false; - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) - return false; - } - - if (I->end < J->end) - ++I; - else - ++J; - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[LHSValID]->setHasPHIKill(true); - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[RHSValID]->setHasPHIKill(true); - } - - if (LHSValNoAssignments.empty()) - LHSValNoAssignments.push_back(-1); - if (RHSValNoAssignments.empty()) - RHSValNoAssignments.push_back(-1); - - // If we get here, we know that we can coalesce the live ranges. Ask the - // intervals to coalesce themselves now. - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, - mri_); - return true; -} - -namespace { - // DepthMBBCompare - Comparison predicate that sort first based on the loop - // depth of the basic block (the unsigned), and then on the MBB number. - struct DepthMBBCompare { - typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; - bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { - // Deeper loops first - if (LHS.first != RHS.first) - return LHS.first > RHS.first; - - // Prefer blocks that are more connected in the CFG. This takes care of - // the most difficult copies first while intervals are short. - unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); - unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); - if (cl != cr) - return cl > cr; - - // As a last resort, sort by block number. - return LHS.second->getNumber() < RHS.second->getNumber(); - } - }; -} - -void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<CopyRec> &TryAgain) { - DEBUG(dbgs() << MBB->getName() << ":\n"); - - SmallVector<CopyRec, 8> VirtCopies; - SmallVector<CopyRec, 8> PhysCopies; - SmallVector<CopyRec, 8> ImpDefCopies; - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E;) { - MachineInstr *Inst = MII++; - - // If this isn't a copy nor a extract_subreg, we can't join intervals. - unsigned SrcReg, DstReg; - if (Inst->isCopy()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->isSubregToReg()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(2).getReg(); - } else - continue; - - bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) - ImpDefCopies.push_back(CopyRec(Inst, 0)); - else if (SrcIsPhys || DstIsPhys) - PhysCopies.push_back(CopyRec(Inst, 0)); - else - VirtCopies.push_back(CopyRec(Inst, 0)); - } - - // Try coalescing implicit copies and insert_subreg <undef> first, - // followed by copies to / from physical registers, then finally copies - // from virtual registers to virtual registers. - for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { - CopyRec &TheCopy = ImpDefCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { - CopyRec &TheCopy = PhysCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { - CopyRec &TheCopy = VirtCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } -} - -void SimpleRegisterCoalescing::joinIntervals() { - DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); - - std::vector<CopyRec> TryAgainList; - if (loopInfo->empty()) { - // If there are no loops in the function, join intervals in function order. - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); - I != E; ++I) - CopyCoalesceInMBB(I, TryAgainList); - } else { - // Otherwise, join intervals in inner loops before other intervals. - // Unfortunately we can't just iterate over loop hierarchy here because - // there may be more MBB's than BB's. Collect MBB's for sorting. - - // Join intervals in the function prolog first. We want to join physical - // registers with virtual registers before the intervals got too long. - std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ - MachineBasicBlock *MBB = I; - MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); - } - - // Sort by loop depth. - std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); - - // Finally, join intervals in loop nest order. - for (unsigned i = 0, e = MBBs.size(); i != e; ++i) - CopyCoalesceInMBB(MBBs[i].second, TryAgainList); - } - - // Joining intervals can allow other intervals to be joined. Iteratively join - // until we make no progress. - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - - for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { - CopyRec &TheCopy = TryAgainList[i]; - if (!TheCopy.MI) - continue; - - bool Again = false; - bool Success = JoinCopy(TheCopy, Again); - if (Success || !Again) { - TheCopy.MI = 0; // Mark this one as done. - ProgressMade = true; - } - } - } -} - -void SimpleRegisterCoalescing::releaseMemory() { - JoinedCopies.clear(); - ReMatCopies.clear(); - ReMatDefs.clear(); -} - -bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &fn.getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - li_ = &getAnalysis<LiveIntervals>(); - ldv_ = &getAnalysis<LiveDebugVariables>(); - AA = &getAnalysis<AliasAnalysis>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); - - DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'); - - if (VerifyCoalescing) - mf_->verify(this, "Before register coalescing"); - - RegClassInfo.runOnMachineFunction(fn); - - // Join (coalesce) intervals if requested. - if (EnableJoining) { - joinIntervals(); - DEBUG({ - dbgs() << "********** INTERVALS POST JOINING **********\n"; - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); - I != E; ++I){ - I->second->print(dbgs(), tri_); - dbgs() << "\n"; - } - }); - } - - // Perform a final pass over the instructions and compute spill weights - // and remove identity moves. - SmallVector<unsigned, 4> DeadDefs; - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* mbb = mbbi; - for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); - mii != mie; ) { - MachineInstr *MI = mii; - if (JoinedCopies.count(MI)) { - // Delete all coalesced copies. - bool DoDelete = true; - assert(MI->isCopyLike() && "Unrecognized copy instruction"); - unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - MI->getNumOperands() > 2) - // Do not delete extract_subreg, insert_subreg of physical - // registers unless the definition is dead. e.g. - // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 - // or else the scavenger may complain. LowerSubregs will - // delete them later. - DoDelete = false; - - if (MI->allDefsAreDead()) { - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - li_->hasInterval(SrcReg)) - li_->shrinkToUses(&li_->getInterval(SrcReg)); - DoDelete = true; - } - if (!DoDelete) { - // We need the instruction to adjust liveness, so make it a KILL. - if (MI->isSubregToReg()) { - MI->RemoveOperand(3); - MI->RemoveOperand(1); - } - MI->setDesc(tii_->get(TargetOpcode::KILL)); - mii = llvm::next(mii); - } else { - li_->RemoveMachineInstrFromMaps(MI); - mii = mbbi->erase(mii); - ++numPeep; - } - continue; - } - - // Now check if this is a remat'ed def instruction which is now dead. - if (ReMatDefs.count(MI)) { - bool isDead = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) - DeadDefs.push_back(Reg); - if (MO.isDead()) - continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !mri_->use_nodbg_empty(Reg)) { - isDead = false; - break; - } - } - if (isDead) { - while (!DeadDefs.empty()) { - unsigned DeadDef = DeadDefs.back(); - DeadDefs.pop_back(); - RemoveDeadDef(li_->getInterval(DeadDef), MI); - } - li_->RemoveMachineInstrFromMaps(mii); - mii = mbbi->erase(mii); - continue; - } else - DeadDefs.clear(); - } - - ++mii; - - // Check for now unnecessary kill flags. - if (li_->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - unsigned reg = MO.getReg(); - if (!reg || !li_->hasInterval(reg)) continue; - if (!li_->getInterval(reg).killedAt(DefIdx)) { - MO.setIsKill(false); - continue; - } - // When leaving a kill flag on a physreg, check if any subregs should - // remain alive. - if (!TargetRegisterInfo::isPhysicalRegister(reg)) - continue; - for (const unsigned *SR = tri_->getSubRegisters(reg); - unsigned S = *SR; ++SR) - if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx)) - MI->addRegisterDefined(S, tri_); - } - } - } - - DEBUG(dump()); - DEBUG(ldv_->dump()); - if (VerifyCoalescing) - mf_->verify(this, "After register coalescing"); - return true; -} - -/// print - Implement the dump method. -void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const { - li_->print(O, m); -} - -RegisterCoalescer* llvm::createSimpleRegisterCoalescer() { - return new SimpleRegisterCoalescing(); -} - -// Make sure that anything that uses RegisterCoalescer pulls in this file... -DEFINING_FILE_FOR(SimpleRegisterCoalescing) diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index bf27cc8..a0952a0 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -76,12 +76,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { return LSP.first; // There may not be a call instruction (?) in which case we ignore LPad. LSP.second = LSP.first; - for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin(); - I != E; --I) + for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); + I != E;) { + --I; if (I->getDesc().isCall()) { LSP.second = LIS.getInstructionIndex(I); break; } + } } // If CurLI is live into a landing pad successor, move the last split point @@ -122,7 +124,7 @@ void SplitAnalysis::analyzeUses() { // Compute per-live block info. if (!calcLiveBlockInfo()) { // FIXME: calcLiveBlockInfo found inconsistencies in the live range. - // I am looking at you, SimpleRegisterCoalescing! + // I am looking at you, RegisterCoalescer! DidRepairRange = true; ++NumRepairs; DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); @@ -165,7 +167,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); // If the block contains no uses, the range must be live through. At one - // point, SimpleRegisterCoalescing could create dangling ranges that ended + // point, RegisterCoalescer could create dangling ranges that ended // mid-block. if (UseI == UseE || *UseI >= Stop) { ++NumThroughBlocks; @@ -634,6 +636,7 @@ unsigned SplitEditor::openIntv() { void SplitEditor::selectIntv(unsigned Idx) { assert(Idx != 0 && "Cannot select the complement interval"); assert(Idx < Edit->size() && "Can only select previously opened interval"); + DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n'); OpenIdx = Idx; } @@ -654,6 +657,24 @@ SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { return VNI->def; } +SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before enterIntvAfter"); + DEBUG(dbgs() << " enterIntvAfter " << Idx); + Idx = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "enterIntvAfter called with invalid index"); + + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), + llvm::next(MachineBasicBlock::iterator(MI))); + return VNI->def; +} + SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); SlotIndex End = LIS.getMBBEndIdx(&MBB); @@ -1005,12 +1026,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { markComplexMapped(i, ParentVNI); } -#ifndef NDEBUG - // Every new interval must have a def by now, otherwise the split is bogus. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) - assert((*I)->hasAtLeastOneValue() && "Split interval has no value"); -#endif - // Transfer the simply mapped values, check if any are skipped. bool Skipped = transferValues(); if (Skipped) diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 7174c0b..a9ccf40b 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -81,6 +81,12 @@ public: bool LiveThrough; ///< Live in whole block (Templ 5. above). bool LiveIn; ///< Current reg is live in. bool LiveOut; ///< Current reg is live out. + + /// isOneInstr - Returns true when this BlockInfo describes a single + /// instruction. + bool isOneInstr() const { + return SlotIndex::isSameInstr(FirstUse, LastUse); + } }; private: @@ -360,6 +366,10 @@ public: /// Return the beginning of the new live range. SlotIndex enterIntvBefore(SlotIndex Idx); + /// enterIntvAfter - Enter the open interval after the instruction at Idx. + /// Return the beginning of the new live range. + SlotIndex enterIntvAfter(SlotIndex Idx); + /// enterIntvAtEnd - Enter the open interval at the end of MBB. /// Use the open interval from he inserted copy to the MBB end. /// Return the beginning of the new live range. diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp index 08aee82..ec75df4 100644 --- a/lib/CodeGen/Splitter.cpp +++ b/lib/CodeGen/Splitter.cpp @@ -11,7 +11,7 @@ #include "Splitter.h" -#include "SimpleRegisterCoalescing.h" +#include "RegisterCoalescer.h" #include "llvm/Module.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 01f5b56..57cbe1b 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -504,7 +504,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, bool FoundDef = false; // Not counting 2address def. Uses.clear(); - const TargetInstrDesc &TID = MII->getDesc(); + const MCInstrDesc &MCID = MII->getDesc(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { MachineOperand &MO = MII->getOperand(i); if (!MO.isReg()) @@ -521,7 +521,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, if (MO.getSubReg() || MII->isSubregToReg()) return false; - const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); if (RC && !RC->contains(NewReg)) return false; @@ -566,7 +566,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, SmallVector<MachineOperand*, 4> Uses; while (++MII != MBB->end()) { bool FoundKill = false; - const TargetInstrDesc &TID = MII->getDesc(); + const MCInstrDesc &MCID = MII->getDesc(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { MachineOperand &MO = MII->getOperand(i); if (!MO.isReg()) @@ -583,7 +583,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, if (MO.getSubReg()) return false; - const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); if (RC && !RC->contains(NewReg)) return false; if (MO.isKill()) diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 3111d59..6fe4bd7 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -97,8 +97,8 @@ namespace { bool shouldTailDuplicate(const MachineFunction &MF, bool IsSimple, MachineBasicBlock &TailBB); bool isSimpleBB(MachineBasicBlock *TailBB); - bool canCompletelyDuplicateBB(MachineBasicBlock &BB, bool IsSimple); - void duplicateSimpleBB(MachineBasicBlock *TailBB, + bool canCompletelyDuplicateBB(MachineBasicBlock &BB); + bool duplicateSimpleBB(MachineBasicBlock *TailBB, SmallVector<MachineBasicBlock*, 8> &TDBBs, const DenseSet<unsigned> &RegsUsedByPhi, SmallVector<MachineInstr*, 16> &Copies); @@ -529,8 +529,8 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, bool hasIndirectBR = false; if (PreRegAlloc && !TailBB.empty()) { - const TargetInstrDesc &TID = TailBB.back().getDesc(); - if (TID.isIndirectBranch()) { + const MCInstrDesc &MCID = TailBB.back().getDesc(); + if (MCID.isIndirectBranch()) { MaxDuplicateCount = 20; hasIndirectBR = true; } @@ -568,12 +568,12 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, return true; if (IsSimple) - return canCompletelyDuplicateBB(TailBB, IsSimple); + return true; if (!PreRegAlloc) return true; - return canCompletelyDuplicateBB(TailBB, IsSimple); + return canCompletelyDuplicateBB(TailBB); } /// isSimpleBB - True if this BB has only one unconditional jump. @@ -606,74 +606,59 @@ bothUsedInPHI(const MachineBasicBlock &A, } bool -TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB, - bool isSimple) { +TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), PE = BB.pred_end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; - if (isSimple) { - if (PredBB->getLandingPadSuccessor()) - return false; - if (bothUsedInPHI(*PredBB, Succs)) - return false; - } else { - if (PredBB->succ_size() > 1) - return false; - } + if (PredBB->succ_size() > 1) + return false; MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) return false; - if (!isSimple && !PredCond.empty()) + if (!PredCond.empty()) return false; } return true; } -void +bool TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, SmallVector<MachineBasicBlock*, 8> &TDBBs, const DenseSet<unsigned> &UsedByPhi, SmallVector<MachineInstr*, 16> &Copies) { + SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), + TailBB->succ_end()); SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); + bool Changed = false; for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; + if (PredBB->getLandingPadSuccessor()) + continue; + + if (bothUsedInPHI(*PredBB, Succs)) + continue; + MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; SmallVector<MachineOperand, 4> PredCond; - bool NotAnalyzable = - TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); - (void)NotAnalyzable; - assert(!NotAnalyzable && "Cannot duplicate this!"); + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + Changed = true; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From simple Succ: " << *TailBB); MachineBasicBlock *NewTarget = *TailBB->succ_begin(); MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); - DenseMap<unsigned, unsigned> LocalVRMap; - SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; - for (MachineBasicBlock::iterator I = TailBB->begin(); - I != TailBB->end() && I->isPHI();) { - MachineInstr *MI = &*I; - ++I; - ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); - } - MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); - for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), - TII->get(TargetOpcode::COPY), - CopyInfos[i].first).addReg(CopyInfos[i].second)); - } - // Make PredFBB explicit. if (PredCond.empty()) PredFBB = PredTBB; @@ -715,6 +700,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, TDBBs.push_back(PredBB); } + return Changed; } /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each @@ -733,11 +719,8 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, DenseSet<unsigned> UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); - if (IsSimple) { - duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); - return true; - } - + if (IsSimple) + return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 34e2b33..86e71d8 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -59,8 +59,8 @@ TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool NewMI) const { - const TargetInstrDesc &TID = MI->getDesc(); - bool HasDef = TID.getNumDefs(); + const MCInstrDesc &MCID = MI->getDesc(); + bool HasDef = MCID.getNumDefs(); if (HasDef && !MI->getOperand(0).isReg()) // No idea how to commute this instruction. Target should implement its own. return 0; @@ -81,7 +81,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool ChangeReg0 = false; if (HasDef && MI->getOperand(0).getReg() == Reg1) { // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); Reg2IsKill = false; ChangeReg0 = true; @@ -119,12 +119,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isCommutable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isCommutable()) return false; // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this // is not true, then the target must implement this. - SrcOpIdx1 = TID.getNumDefs(); + SrcOpIdx1 = MCID.getNumDefs(); SrcOpIdx2 = SrcOpIdx1 + 1; if (!MI->getOperand(SrcOpIdx1).isReg() || !MI->getOperand(SrcOpIdx2).isReg()) @@ -137,12 +137,12 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { bool MadeChange = false; - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isPredicable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isPredicable()) return false; for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (TID.OpInfo[i].isPredicate()) { + if (MCID.OpInfo[i].isPredicate()) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { MO.setReg(Pred[j].getReg()); @@ -332,10 +332,10 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) return true; - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); // Avoid instructions obviously unsafe for remat. - if (TID.isNotDuplicable() || TID.mayStore() || + if (MCID.isNotDuplicable() || MCID.mayStore() || MI->hasUnmodeledSideEffects()) return false; @@ -345,7 +345,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return false; // Avoid instructions which load from potentially varying memory. - if (TID.mayLoad() && !MI->isInvariantLoad(AA)) + if (MCID.mayLoad() && !MI->isInvariantLoad(AA)) return false; // If any of the registers accessed are non-constant, conservatively assume diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 3860e0b..6d6244e 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -280,8 +280,8 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, /// isTwoAddrUse - Return true if the specified MI is using the specified /// register as a two-address operand. static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const TargetInstrDesc &TID = UseMI->getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MCInstrDesc &MCID = UseMI->getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(i); if (MO.isReg() && MO.getReg() == Reg && (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) @@ -443,8 +443,9 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, /// isTwoAddrUse - Return true if the specified MI uses the specified register /// as a two-address use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands(); + const MCInstrDesc &MCID = MI.getDesc(); + unsigned NumOps = MI.isInlineAsm() + ? MI.getNumOperands() : MCID.getNumOperands(); for (unsigned i = 0; i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) @@ -761,10 +762,10 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, static bool isSafeToDelete(MachineInstr *MI, const TargetInstrInfo *TII, SmallVector<unsigned, 4> &Kills) { - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayStore() || TID.isCall()) + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayStore() || MCID.isCall()) return false; - if (TID.isTerminator() || MI->hasUnmodeledSideEffects()) + if (MCID.isTerminator() || MI->hasUnmodeledSideEffects()) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -854,7 +855,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist, SmallPtrSet<MachineInstr*, 8> &Processed) { - const TargetInstrDesc &TID = mi->getDesc(); + const MCInstrDesc &MCID = mi->getDesc(); unsigned regA = mi->getOperand(DstIdx).getReg(); unsigned regB = mi->getOperand(SrcIdx).getReg(); @@ -876,7 +877,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (TID.isCommutable() && mi->getNumOperands() >= 3 && + if (MCID.isCommutable() && mi->getNumOperands() >= 3 && TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; @@ -907,7 +908,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); - if (TID.isConvertibleTo3Addr()) { + if (MCID.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { @@ -927,7 +928,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // movq (%rax), %rcx // addq %rdx, %rcx // because it's preferable to schedule a load than a register copy. - if (TID.mayLoad() && !regBKilled) { + if (MCID.mayLoad() && !regBKilled) { // Determine if a load can be unfolded. unsigned LoadRegIndex; unsigned NewOpc = @@ -936,14 +937,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /*UnfoldStore=*/false, &LoadRegIndex); if (NewOpc != 0) { - const TargetInstrDesc &UnfoldTID = TII->get(NewOpc); - if (UnfoldTID.getNumDefs() == 1) { + const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); + if (UnfoldMCID.getNumDefs() == 1) { MachineFunction &MF = *mbbi->getParent(); // Unfold the load. DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); const TargetRegisterClass *RC = - UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; if (!TII->unfoldMemoryOperand(MF, mi, Reg, @@ -1067,7 +1068,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (mi->isRegSequence()) RegSequences.push_back(&*mi); - const TargetInstrDesc &TID = mi->getDesc(); + const MCInstrDesc &MCID = mi->getDesc(); bool FirstTied = true; DistanceMap.insert(std::make_pair(mi, ++Dist)); @@ -1077,7 +1078,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : TID.getNumOperands(); + ? mi->getNumOperands() : MCID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 1850658..a5ec797 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -679,8 +679,8 @@ static void ReMaterialize(MachineBasicBlock &MBB, VirtRegMap &VRM) { MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); #ifndef NDEBUG - const TargetInstrDesc &TID = ReMatDefMI->getDesc(); - assert(TID.getNumDefs() == 1 && + const MCInstrDesc &MCID = ReMatDefMI->getDesc(); + assert(MCID.getNumDefs() == 1 && "Don't know how to remat instructions that define > 1 values!"); #endif TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); @@ -1483,11 +1483,11 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII, /// where SrcReg is r1 and it is tied to r0. Return true if after /// commuting this instruction it will be r0 = op r2, r1. static bool CommuteChangesDestination(MachineInstr *DefMI, - const TargetInstrDesc &TID, + const MCInstrDesc &MCID, unsigned SrcReg, const TargetInstrInfo *TII, unsigned &DstIdx) { - if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3) + if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3) return false; if (!DefMI->getOperand(1).isReg() || DefMI->getOperand(1).getReg() != SrcReg) @@ -1527,11 +1527,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr &MI = *MII; MachineBasicBlock::iterator DefMII = prior(MII); MachineInstr *DefMI = DefMII; - const TargetInstrDesc &TID = DefMI->getDesc(); + const MCInstrDesc &MCID = DefMI->getDesc(); unsigned NewDstIdx; if (DefMII != MBB->begin() && - TID.isCommutable() && - CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) { + MCID.isCommutable() && + CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) { MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) @@ -1658,9 +1658,9 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII, /// isSafeToDelete - Return true if this instruction doesn't produce any side /// effect and all of its defs are dead. static bool isSafeToDelete(MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() || - TID.isCall() || TID.isBarrier() || TID.isReturn() || + const MCInstrDesc &MCID = MI.getDesc(); + if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() || + MCID.isCall() || MCID.isBarrier() || MCID.isReturn() || MI.isLabel() || MI.isDebugValue() || MI.hasUnmodeledSideEffects()) return false; diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index a8822e5..f51aff3 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -16,10 +16,10 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/Module.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Host.h" -#include "llvm/Target/SubtargetFeature.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; @@ -75,9 +75,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, // Package up features to be passed to target/subtarget std::string FeaturesStr; - if (!MCPU.empty() || !MAttrs.empty()) { + if (!MAttrs.empty()) { SubtargetFeatures Features; - Features.setCPU(MCPU); for (unsigned i = 0; i != MAttrs.size(); ++i) Features.AddFeature(MAttrs[i]); FeaturesStr = Features.getString(); @@ -85,7 +84,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, // Allocate a target... TargetMachine *Target = - TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr); + TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr); assert(Target && "Could not allocate target machine!"); return Target; } diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index a77ecd3..00e534f 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_library(LLVMMC MachObjectWriter.cpp WinCOFFStreamer.cpp WinCOFFObjectWriter.cpp + SubtargetFeature.cpp TargetAsmBackend.cpp ) diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 6e636f0..6d6777e 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -55,11 +55,13 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, // Package up features to be passed to target/subtarget std::string FeaturesStr; + std::string CPU; // FIXME: We shouldn't need to do this (and link in codegen). // When we split this out, we should do it in a way that makes // it straightforward to switch subtargets on the fly. - TargetMachine *TM = TheTarget->createTargetMachine(TripleName, FeaturesStr); + TargetMachine *TM = TheTarget->createTargetMachine(TripleName, CPU, + FeaturesStr); assert(TM && "Unable to create target machine!"); // Get the target assembler info needed to setup the context. diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index 91c5284..2a46d37 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -167,9 +167,9 @@ EDDisassembler::EDDisassembler(CPUKey &key) : if (!Tgt) return; + std::string CPU; std::string featureString; - - TargetMachine.reset(Tgt->createTargetMachine(tripleString, + TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU, featureString)); const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo(); diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index d232d84..13164ed 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -30,23 +30,23 @@ using namespace llvm; #define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE) // The maximum address skip amount that can be encoded with a special op. -#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255) +#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255) // First special line opcode - leave room for the standard opcodes. // Note: If you want to change this, you'll have to update the // "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit(). -#define DWARF2_LINE_OPCODE_BASE 13 +#define DWARF2_LINE_OPCODE_BASE 13 // Minimum line offset in a special line info. opcode. This value // was chosen to give a reasonable range of values. -#define DWARF2_LINE_BASE -5 +#define DWARF2_LINE_BASE -5 // Range of line offsets in a special line info. opcode. -# define DWARF2_LINE_RANGE 14 +#define DWARF2_LINE_RANGE 14 // Define the architecture-dependent minimum instruction length (in bytes). // This value should be rather too small than too big. -# define DWARF2_LINE_MIN_INSN_LENGTH 1 +#define DWARF2_LINE_MIN_INSN_LENGTH 1 // Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting, // this routine is a nop and will be optimized away. @@ -290,7 +290,7 @@ void MCDwarfFileTable::Emit(MCStreamer *MCOS) { const std::vector<const MCSection *> &MCLineSectionOrder = MCOS->getContext().getMCLineSectionOrder(); for (std::vector<const MCSection*>::const_iterator it = - MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie; + MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie; ++it) { const MCSection *Sec = *it; const MCLineSection *Line = MCLineSections.lookup(Sec); @@ -460,13 +460,14 @@ static unsigned getSizeForEncoding(MCStreamer &streamer, } static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, - unsigned symbolEncoding) { + unsigned symbolEncoding, const char *comment = 0) { MCContext &context = streamer.getContext(); const MCAsmInfo &asmInfo = context.getAsmInfo(); const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol, symbolEncoding, streamer); unsigned size = getSizeForEncoding(streamer, symbolEncoding); + if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment); streamer.EmitAbsValue(v, size); } @@ -526,11 +527,46 @@ namespace { void EmitCFIInstruction(MCStreamer &Streamer, const MCCFIInstruction &Instr); }; + +} // end anonymous namespace + +static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding, + StringRef Prefix) { + if (Streamer.isVerboseAsm()) { + const char *EncStr = 0; + switch (Encoding) { + default: EncStr = "<unknown encoding>"; + case dwarf::DW_EH_PE_absptr: EncStr = "absptr"; + case dwarf::DW_EH_PE_omit: EncStr = "omit"; + case dwarf::DW_EH_PE_pcrel: EncStr = "pcrel"; + case dwarf::DW_EH_PE_udata4: EncStr = "udata4"; + case dwarf::DW_EH_PE_udata8: EncStr = "udata8"; + case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4"; + case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8"; + case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: EncStr = "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: EncStr = "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: EncStr = "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: EncStr = "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4: + EncStr = "indirect pcrel udata4"; + case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4: + EncStr = "indirect pcrel sdata4"; + case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8: + EncStr = "indirect pcrel udata8"; + case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8: + EncStr = "indirect pcrel sdata8"; + } + + Streamer.AddComment(Twine(Prefix) + " = " + EncStr); + } + + Streamer.EmitIntValue(Encoding, 1); } void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, const MCCFIInstruction &Instr) { int dataAlignmentFactor = getDataAlignmentFactor(Streamer); + bool VerboseAsm = Streamer.isVerboseAsm(); switch (Instr.getOperation()) { case MCCFIInstruction::Move: @@ -542,9 +578,13 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, // If advancing cfa. if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { if (Src.getReg() == MachineLocation::VirtualFP) { + if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_offset"); Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1); } else { + if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa"); Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + + Twine(Src.getReg())); Streamer.EmitULEB128IntValue(Src.getReg()); } @@ -553,47 +593,62 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, else CFAOffset = -Src.getOffset(); + if (VerboseAsm) Streamer.AddComment(Twine("Offset " + Twine(CFAOffset))); Streamer.EmitULEB128IntValue(CFAOffset); return; } if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { assert(Dst.isReg() && "Machine move not supported yet."); + if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_register"); Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Dst.getReg())); Streamer.EmitULEB128IntValue(Dst.getReg()); return; } unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset(); if (IsRelative) Offset -= CFAOffset; Offset = Offset / dataAlignmentFactor; if (Offset < 0) { + if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf"); Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg)); Streamer.EmitULEB128IntValue(Reg); + if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset)); Streamer.EmitSLEB128IntValue(Offset); } else if (Reg < 64) { + if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") + + Twine(Reg) + ")"); Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset)); Streamer.EmitULEB128IntValue(Offset); } else { + if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended"); Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg)); Streamer.EmitULEB128IntValue(Reg); + if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset)); Streamer.EmitULEB128IntValue(Offset); } return; } case MCCFIInstruction::Remember: + if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state"); Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1); return; case MCCFIInstruction::Restore: + if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state"); Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1); return; case MCCFIInstruction::SameValue: { unsigned Reg = Instr.getDestination().getReg(); + if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value"); Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg)); Streamer.EmitULEB128IntValue(Reg); return; } @@ -616,6 +671,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, if (BaseLabel && Label) { MCSymbol *ThisSym = Label; if (ThisSym != BaseLabel) { + if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4"); streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym); BaseLabel = ThisSym; } @@ -635,10 +691,7 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, #else MCContext &Context = Streamer.getContext(); const TargetAsmInfo &TAI = Context.getTargetAsmInfo(); - Streamer.SwitchSection(TAI.getCompactUnwindSection()); - - unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI); - unsigned Size = getSizeForEncoding(Streamer, FDEEncoding); + bool VerboseAsm = Streamer.isVerboseAsm(); // range-start range-length compact-unwind-enc personality-func lsda // _foo LfooEnd-_foo 0x00000023 0 0 @@ -662,13 +715,43 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, // .quad __gxx_personality // .quad except_tab1 + Streamer.SwitchSection(TAI.getCompactUnwindSection()); + // Range Start - EmitSymbol(Streamer, *Frame.Begin, FDEEncoding); + unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI); + unsigned Size = getSizeForEncoding(Streamer, FDEEncoding); + if (VerboseAsm) Streamer.AddComment("Range Start"); + Streamer.EmitSymbolValue(Frame.Function, Size); // Range Length const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin, *Frame.End, 0); - Streamer.EmitAbsValue(Range, Size); + if (VerboseAsm) Streamer.AddComment("Range Length"); + Streamer.EmitAbsValue(Range, 4); + + // FIXME: + // Compact Encoding + const std::vector<MachineMove> &Moves = TAI.getInitialFrameState(); + uint32_t Encoding = 0; + Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4); + if (VerboseAsm) Streamer.AddComment("Compact Unwind Encoding"); + Streamer.EmitIntValue(Encoding, Size); + + // Personality Function + Size = getSizeForEncoding(Streamer, Frame.PersonalityEncoding); + if (VerboseAsm) Streamer.AddComment("Personality Function"); + if (Frame.Personality) + Streamer.EmitSymbolValue(Frame.Personality, Size); + else + Streamer.EmitIntValue(0, Size); // No personality fn + + // LSDA + Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding); + if (VerboseAsm) Streamer.AddComment("LSDA"); + if (Frame.Lsda) + Streamer.EmitSymbolValue(Frame.Lsda, Size); + else + Streamer.EmitIntValue(0, Size); // No LSDA return true; #endif @@ -681,6 +764,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, unsigned lsdaEncoding) { MCContext &context = streamer.getContext(); const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + bool verboseAsm = streamer.isVerboseAsm(); MCSymbol *sectionStart; if (asmInfo.isFunctionEHFrameSymbolPrivate() || !IsEH) @@ -688,6 +772,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, else sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum)); + streamer.EmitLabel(sectionStart); CIENum++; MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol(); @@ -695,19 +780,22 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart, *sectionEnd, 4); - streamer.EmitLabel(sectionStart); + if (verboseAsm) streamer.AddComment("CIE Length"); streamer.EmitAbsValue(Length, 4); // CIE ID unsigned CIE_ID = IsEH ? 0 : -1; + if (verboseAsm) streamer.AddComment("CIE ID Tag"); streamer.EmitIntValue(CIE_ID, 4); // Version + if (verboseAsm) streamer.AddComment("DW_CIE_VERSION"); streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1); // Augmentation String SmallString<8> Augmentation; if (IsEH) { + if (verboseAsm) streamer.AddComment("CIE Augmentation"); Augmentation += "z"; if (personality) Augmentation += "P"; @@ -719,12 +807,15 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, streamer.EmitIntValue(0, 1); // Code Alignment Factor + if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor"); streamer.EmitULEB128IntValue(1); // Data Alignment Factor + if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor"); streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer)); // Return Address Register + if (verboseAsm) streamer.AddComment("CIE Return Address Column"); streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true)); // Augmentation Data Length (optional) @@ -742,19 +833,25 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Encoding of the FDE pointers augmentationLength += 1; + if (verboseAsm) streamer.AddComment("Augmentation Size"); streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data (optional) if (personality) { // Personality Encoding - streamer.EmitIntValue(personalityEncoding, 1); + EmitEncodingByte(streamer, personalityEncoding, + "Personality Encoding"); // Personality + if (verboseAsm) streamer.AddComment("Personality"); EmitPersonality(streamer, *personality, personalityEncoding); } + if (lsda) - streamer.EmitIntValue(lsdaEncoding, 1); // LSDA Encoding + EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding"); + // Encoding of the FDE pointers - streamer.EmitIntValue(asmInfo.getFDEEncoding(UsingCFI), 1); + EmitEncodingByte(streamer, asmInfo.getFDEEncoding(UsingCFI), + "FDE Encoding"); } // Initial Instructions @@ -788,16 +885,18 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, MCSymbol *fdeStart = context.CreateTempSymbol(); MCSymbol *fdeEnd = context.CreateTempSymbol(); const TargetAsmInfo &TAsmInfo = context.getTargetAsmInfo(); + bool verboseAsm = streamer.isVerboseAsm(); if (!TAsmInfo.isFunctionEHFrameSymbolPrivate() && IsEH) { - MCSymbol *EHSym = context.GetOrCreateSymbol( - frame.Function->getName() + Twine(".eh")); + MCSymbol *EHSym = + context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh")); streamer.EmitEHSymAttributes(frame.Function, EHSym); streamer.EmitLabel(EHSym); } // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0); + if (verboseAsm) streamer.AddComment("FDE Length"); streamer.EmitAbsValue(Length, 4); streamer.EmitLabel(fdeStart); @@ -807,6 +906,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, if (IsEH) { const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart, 0); + if (verboseAsm) streamer.AddComment("FDE CIE Offset"); streamer.EmitAbsValue(offset, 4); } else if (!asmInfo.doesDwarfRequireRelocationForSectionOffset()) { const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart, @@ -815,6 +915,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, } else { streamer.EmitSymbolValue(&cieStart, 4); } + unsigned fdeEncoding = TAsmInfo.getFDEEncoding(UsingCFI); unsigned size = getSizeForEncoding(streamer, fdeEncoding); @@ -822,11 +923,12 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, unsigned PCBeginEncoding = IsEH ? fdeEncoding : (unsigned)dwarf::DW_EH_PE_absptr; unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding); - EmitSymbol(streamer, *frame.Begin, PCBeginEncoding); + EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location"); // PC Range const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin, *frame.End, 0); + if (verboseAsm) streamer.AddComment("FDE address range"); streamer.EmitAbsValue(Range, size); if (IsEH) { @@ -836,11 +938,13 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, if (frame.Lsda) augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding); + if (verboseAsm) streamer.AddComment("Augmentation size"); streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data if (frame.Lsda) - EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding); + EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding, + "Language Specific Data Area"); } // Call Frame Instructions diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 7b62db2..db188f7 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" @@ -1612,13 +1613,18 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { for (;;) { const MCExpr *Value; + SMLoc ExprLoc = getLexer().getLoc(); if (ParseExpression(Value)) return true; // Special case constant expressions to match code generator. - if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) - getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); - else + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { + assert(Size <= 8 && "Invalid size"); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) + return Error(ExprLoc, "literal value out of range for directive"); + getStreamer().EmitIntValue(IntValue, Size, DEFAULT_ADDRSPACE); + } else getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE); if (getLexer().is(AsmToken::EndOfStatement)) diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 0d80514..69efe23 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -23,34 +23,12 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetAsmBackend.h" -// FIXME: Gross. -#include "../Target/ARM/ARMFixupKinds.h" -#include "../Target/X86/X86FixupKinds.h" - #include <vector> using namespace llvm; using namespace llvm::object; -// FIXME: this has been copied from (or to) X86AsmBackend.cpp -static unsigned getFixupKindLog2Size(unsigned Kind) { - switch (Kind) { - default: - llvm_unreachable("invalid fixup kind!"); - case FK_PCRel_1: - case FK_Data_1: return 0; - case FK_PCRel_2: - case FK_Data_2: return 1; - case FK_PCRel_4: - // FIXME: Remove these!!! - case X86::reloc_riprel_4byte: - case X86::reloc_riprel_4byte_movq_load: - case X86::reloc_signed_4byte: - case FK_Data_4: return 2; - case FK_Data_8: return 3; - } -} - -static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { +bool MachObjectWriter:: +doesSymbolRequireExternRelocation(const MCSymbolData *SD) { // Undefined symbols are always extern. if (SD->Symbol->isUndefined()) return true; @@ -64,207 +42,24 @@ static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { return false; } -namespace { - -class MachObjectWriter : public MCObjectWriter { - /// MachSymbolData - Helper struct for containing some precomputed information - /// on symbols. - struct MachSymbolData { - MCSymbolData *SymbolData; - uint64_t StringIndex; - uint8_t SectionIndex; - - // Support lexicographic sorting. - bool operator<(const MachSymbolData &RHS) const { - return SymbolData->getSymbol().getName() < - RHS.SymbolData->getSymbol().getName(); - } - }; - - /// The target specific Mach-O writer instance. - llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter; - - /// @name Relocation Data - /// @{ - - llvm::DenseMap<const MCSectionData*, - std::vector<macho::RelocationEntry> > Relocations; - llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase; - - /// @} - /// @name Symbol Table Data - /// @{ - - SmallString<256> StringTable; - std::vector<MachSymbolData> LocalSymbolData; - std::vector<MachSymbolData> ExternalSymbolData; - std::vector<MachSymbolData> UndefinedSymbolData; - - /// @} - -private: - /// @name Utility Methods - /// @{ - - bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { - const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( - (MCFixupKind) Kind); - - return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; - } - - /// @} - - SectionAddrMap SectionAddress; - uint64_t getSectionAddress(const MCSectionData* SD) const { - return SectionAddress.lookup(SD); - } - uint64_t getSymbolAddress(const MCSymbolData* SD, - const MCAsmLayout &Layout) const; - - uint64_t getFragmentAddress(const MCFragment *Fragment, - const MCAsmLayout &Layout) const { - return getSectionAddress(Fragment->getParent()) + - Layout.getFragmentOffset(Fragment); - } - - uint64_t getPaddingSize(const MCSectionData *SD, - const MCAsmLayout &Layout) const; -public: - MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS, - bool _IsLittleEndian) - : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) { - } - - /// @name Target Writer Proxy Accessors - /// @{ +bool MachObjectWriter:: +MachSymbolData::operator<(const MachSymbolData &RHS) const { + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); +} - bool is64Bit() const { return TargetObjectWriter->is64Bit(); } - bool isARM() const { - uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask; - return CPUType == mach::CTM_ARM; - } +bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { + const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( + (MCFixupKind) Kind); - /// @} - - void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, - bool SubsectionsViaSymbols); - - /// WriteSegmentLoadCommand - Write a segment load command. - /// - /// \arg NumSections - The number of sections in this segment. - /// \arg SectionDataSize - The total size of the sections. - void WriteSegmentLoadCommand(unsigned NumSections, - uint64_t VMSize, - uint64_t SectionDataStartOffset, - uint64_t SectionDataSize); - - void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCSectionData &SD, uint64_t FileOffset, - uint64_t RelocationsStart, unsigned NumRelocations); - - void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, - uint32_t StringTableOffset, - uint32_t StringTableSize); - - void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, - uint32_t NumLocalSymbols, - uint32_t FirstExternalSymbol, - uint32_t NumExternalSymbols, - uint32_t FirstUndefinedSymbol, - uint32_t NumUndefinedSymbols, - uint32_t IndirectSymbolOffset, - uint32_t NumIndirectSymbols); - - void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout); - - // FIXME: We really need to improve the relocation validation. Basically, we - // want to implement a separate computation which evaluates the relocation - // entry as the linker would, and verifies that the resultant fixup value is - // exactly what the encoder wanted. This will catch several classes of - // problems: - // - // - Relocation entry bugs, the two algorithms are unlikely to have the same - // exact bug. - // - // - Relaxation issues, where we forget to relax something. - // - // - Input errors, where something cannot be correctly encoded. 'as' allows - // these through in many cases. + return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; +} - static bool isFixupKindRIPRel(unsigned Kind) { - return Kind == X86::reloc_riprel_4byte || - Kind == X86::reloc_riprel_4byte_movq_load; - } - void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue); - - void RecordScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - unsigned Log2Size, - uint64_t &FixedValue); - - void RecordARMScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - unsigned Log2Size, - uint64_t &FixedValue); - - void RecordARMMovwMovtRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue); - - void RecordTLVPRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue); - - static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, - unsigned &Log2Size); - - void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue); - - void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue); - - void BindIndirectSymbols(MCAssembler &Asm); - - /// ComputeSymbolTable - Compute the symbol table data - /// - /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. - void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, - std::vector<MachSymbolData> &LocalSymbolData, - std::vector<MachSymbolData> &ExternalSymbolData, - std::vector<MachSymbolData> &UndefinedSymbolData); - - void computeSectionAddresses(const MCAssembler &Asm, - const MCAsmLayout &Layout); - - void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout); - - virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, - const MCSymbolData &DataA, - const MCFragment &FB, - bool InSet, - bool IsPCRel) const; - - void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); -}; - -} // end anonymous namespace +uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment, + const MCAsmLayout &Layout) const { + return getSectionAddress(Fragment->getParent()) + + Layout.getFragmentOffset(Fragment); +} uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD, const MCAsmLayout &Layout) const { @@ -556,774 +351,14 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, Write32(Address); } -void MachObjectWriter::RecordX86_64Relocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - uint64_t &FixedValue) { - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - - // See <reloc.h>. - uint32_t FixupOffset = - Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); - uint32_t FixupAddress = - getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); - int64_t Value = 0; - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - Value = Target.getConstant(); - - if (IsPCRel) { - // Compensate for the relocation offset, Darwin x86_64 relocations only have - // the addend and appear to have attempted to define it to be the actual - // expression addend without the PCrel bias. However, instructions with data - // following the relocation are not accommodated for (see comment below - // regarding SIGNED{1,2,4}), so it isn't exactly that either. - Value += 1LL << Log2Size; - } - - if (Target.isAbsolute()) { // constant - // SymbolNum of 0 indicates the absolute section. - Type = macho::RIT_X86_64_Unsigned; - Index = 0; - - // FIXME: I believe this is broken, I don't think the linker can understand - // it. I think it would require a local relocation, but I'm not sure if that - // would work either. The official way to get an absolute PCrel relocation - // is to use an absolute symbol (which we don't support yet). - if (IsPCRel) { - IsExtern = 1; - Type = macho::RIT_X86_64_Branch; - } - } else if (Target.getSymB()) { // A - B + constant - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData &A_SD = Asm.getSymbolData(*A); - const MCSymbolData *A_Base = Asm.getAtom(&A_SD); - - const MCSymbol *B = &Target.getSymB()->getSymbol(); - MCSymbolData &B_SD = Asm.getSymbolData(*B); - const MCSymbolData *B_Base = Asm.getAtom(&B_SD); - - // Neither symbol can be modified. - if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported relocation of modified symbol"); - - // We don't support PCrel relocations of differences. Darwin 'as' doesn't - // implement most of these correctly. - if (IsPCRel) - report_fatal_error("unsupported pc-relative relocation of difference"); - - // The support for the situation where one or both of the symbols would - // require a local relocation is handled just like if the symbols were - // external. This is certainly used in the case of debug sections where the - // section has only temporary symbols and thus the symbols don't have base - // symbols. This is encoded using the section ordinal and non-extern - // relocation entries. - - // Darwin 'as' doesn't emit correct relocations for this (it ends up with a - // single SIGNED relocation); reject it for now. Except the case where both - // symbols don't have a base, equal but both NULL. - if (A_Base == B_Base && A_Base) - report_fatal_error("unsupported relocation with identical base"); - - Value += getSymbolAddress(&A_SD, Layout) - - (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout)); - Value -= getSymbolAddress(&B_SD, Layout) - - (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout)); - - if (A_Base) { - Index = A_Base->getIndex(); - IsExtern = 1; - } - else { - Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - } - Type = macho::RIT_X86_64_Unsigned; - - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); - - if (B_Base) { - Index = B_Base->getIndex(); - IsExtern = 1; - } - else { - Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - } - Type = macho::RIT_X86_64_Subtractor; - } else { - const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - MCSymbolData &SD = Asm.getSymbolData(*Symbol); - const MCSymbolData *Base = Asm.getAtom(&SD); - - // Relocations inside debug sections always use local relocations when - // possible. This seems to be done because the debugger doesn't fully - // understand x86_64 relocation entries, and expects to find values that - // have already been fixed up. - if (Symbol->isInSection()) { - const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( - Fragment->getParent()->getSection()); - if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) - Base = 0; - } - - // x86_64 almost always uses external relocations, except when there is no - // symbol to use as a base address (a local symbol with no preceding - // non-local symbol). - if (Base) { - Index = Base->getIndex(); - IsExtern = 1; - - // Add the local offset, if needed. - if (Base != &SD) - Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); - } else if (Symbol->isInSection() && !Symbol->isVariable()) { - // The index is the section ordinal (1-based). - Index = SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - Value += getSymbolAddress(&SD, Layout); - - if (IsPCRel) - Value -= FixupAddress + (1 << Log2Size); - } else if (Symbol->isVariable()) { - const MCExpr *Value = Symbol->getVariableValue(); - int64_t Res; - bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress); - if (isAbs) { - FixedValue = Res; - return; - } else { - report_fatal_error("unsupported relocation of variable '" + - Symbol->getName() + "'"); - } - } else { - report_fatal_error("unsupported relocation of undefined symbol '" + - Symbol->getName() + "'"); - } - - MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); - if (IsPCRel) { - if (IsRIPRel) { - if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { - // x86_64 distinguishes movq foo@GOTPCREL so that the linker can - // rewrite the movq to an leaq at link time if the symbol ends up in - // the same linkage unit. - if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) - Type = macho::RIT_X86_64_GOTLoad; - else - Type = macho::RIT_X86_64_GOT; - } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - Type = macho::RIT_X86_64_TLV; - } else if (Modifier != MCSymbolRefExpr::VK_None) { - report_fatal_error("unsupported symbol modifier in relocation"); - } else { - Type = macho::RIT_X86_64_Signed; - - // The Darwin x86_64 relocation format has a problem where it cannot - // encode an address (L<foo> + <constant>) which is outside the atom - // containing L<foo>. Generally, this shouldn't occur but it does - // happen when we have a RIPrel instruction with data following the - // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel - // adjustment Darwin x86_64 uses, the offset is still negative and the - // linker has no way to recognize this. - // - // To work around this, Darwin uses several special relocation types - // to indicate the offsets. However, the specification or - // implementation of these seems to also be incomplete; they should - // adjust the addend as well based on the actual encoded instruction - // (the additional bias), but instead appear to just look at the final - // offset. - switch (-(Target.getConstant() + (1LL << Log2Size))) { - case 1: Type = macho::RIT_X86_64_Signed1; break; - case 2: Type = macho::RIT_X86_64_Signed2; break; - case 4: Type = macho::RIT_X86_64_Signed4; break; - } - } - } else { - if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in branch " - "relocation"); - - Type = macho::RIT_X86_64_Branch; - } - } else { - if (Modifier == MCSymbolRefExpr::VK_GOT) { - Type = macho::RIT_X86_64_GOT; - } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { - // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which - // case all we do is set the PCrel bit in the relocation entry; this is - // used with exception handling, for example. The source is required to - // include any necessary offset directly. - Type = macho::RIT_X86_64_GOT; - IsPCRel = 1; - } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - report_fatal_error("TLVP symbol modifier should have been rip-rel"); - } else if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in relocation"); - else - Type = macho::RIT_X86_64_Unsigned; - } - } - - // x86_64 always writes custom values into the fixups. - FixedValue = Value; - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); -} - -void MachObjectWriter::RecordScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - unsigned Log2Size, - uint64_t &FixedValue) { - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; - - // See <reloc.h>. - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = getSymbolAddress(A_SD, Layout); - uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); - FixedValue += SecAddr; - uint32_t Value2 = 0; - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); - - if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - // - // Note that there is no longer any semantic difference between these two - // relocation types from the linkers point of view, this is done solely for - // pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : - (unsigned)macho::RIT_Generic_LocalDifference; - Value2 = getSymbolAddress(B_SD, Layout); - FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); - } - - // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; - Relocations[Fragment->getParent()].push_back(MRE); - } - - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; - Relocations[Fragment->getParent()].push_back(MRE); -} - -void MachObjectWriter::RecordARMScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - unsigned Log2Size, - uint64_t &FixedValue) { - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; - - // See <reloc.h>. - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = getSymbolAddress(A_SD, Layout); - uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); - FixedValue += SecAddr; - uint32_t Value2 = 0; - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); - - if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - Type = macho::RIT_Difference; - Value2 = getSymbolAddress(B_SD, Layout); - FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); - } - - // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; - Relocations[Fragment->getParent()].push_back(MRE); - } - - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; - Relocations[Fragment->getParent()].push_back(MRE); -} - -void MachObjectWriter::RecordARMMovwMovtRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - uint64_t &FixedValue) { - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_ARM_Half; - - // See <reloc.h>. - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = getSymbolAddress(A_SD, Layout); - uint32_t Value2 = 0; - uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); - FixedValue += SecAddr; - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); - - if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - Type = macho::RIT_ARM_HalfDifference; - Value2 = getSymbolAddress(B_SD, Layout); - FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); - } - - // Relocations are written out in reverse order, so the PAIR comes first. - // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field: - // - // For these two r_type relocations they always have a pair following them and - // the r_length bits are used differently. The encoding of the r_length is as - // follows: - // low bit of r_length: - // 0 - :lower16: for movw instructions - // 1 - :upper16: for movt instructions - // high bit of r_length: - // 0 - arm instructions - // 1 - thumb instructions - // the other half of the relocated expression is in the following pair - // relocation entry in the the low 16 bits of r_address field. - unsigned ThumbBit = 0; - unsigned MovtBit = 0; - switch ((unsigned)Fixup.getKind()) { - default: break; - case ARM::fixup_arm_movt_hi16: - case ARM::fixup_arm_movt_hi16_pcrel: - MovtBit = 1; - break; - case ARM::fixup_t2_movt_hi16: - case ARM::fixup_t2_movt_hi16_pcrel: - MovtBit = 1; - // Fallthrough - case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movw_lo16_pcrel: - ThumbBit = 1; - break; - } - - - if (Type == macho::RIT_ARM_HalfDifference) { - uint32_t OtherHalf = MovtBit - ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); - - macho::RelocationEntry MRE; - MRE.Word0 = ((OtherHalf << 0) | - (macho::RIT_Pair << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; - Relocations[Fragment->getParent()].push_back(MRE); - } - - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; - Relocations[Fragment->getParent()].push_back(MRE); -} - -void MachObjectWriter::RecordTLVPRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - uint64_t &FixedValue) { - assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && - !is64Bit() && - "Should only be called with a 32-bit TLVP relocation!"); - - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = 0; - - // Get the symbol data. - MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); - unsigned Index = SD_A->getIndex(); - - // We're only going to have a second symbol in pic mode and it'll be a - // subtraction from the picbase. For 32-bit pic the addend is the difference - // between the picbase and the next address. For 32-bit static the addend is - // zero. - if (Target.getSymB()) { - // If this is a subtraction then we're pcrel. - uint32_t FixupAddress = - getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); - MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); - IsPCRel = 1; - FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) + - Target.getConstant()); - FixedValue += 1ULL << Log2Size; - } else { - FixedValue = 0; - } - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = Value; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (1 << 27) | // Extern - (macho::RIT_Generic_TLV << 28)); // Type - Relocations[Fragment->getParent()].push_back(MRE); -} - -bool MachObjectWriter::getARMFixupKindMachOInfo(unsigned Kind, - unsigned &RelocType, - unsigned &Log2Size) { - RelocType = unsigned(macho::RIT_Vanilla); - Log2Size = ~0U; - - switch (Kind) { - default: - return false; - - case FK_Data_1: - Log2Size = llvm::Log2_32(1); - return true; - case FK_Data_2: - Log2Size = llvm::Log2_32(2); - return true; - case FK_Data_4: - Log2Size = llvm::Log2_32(4); - return true; - case FK_Data_8: - Log2Size = llvm::Log2_32(8); - return true; - - // Handle 24-bit branch kinds. - case ARM::fixup_arm_ldst_pcrel_12: - case ARM::fixup_arm_pcrel_10: - case ARM::fixup_arm_adr_pcrel_12: - case ARM::fixup_arm_condbranch: - case ARM::fixup_arm_uncondbranch: - RelocType = unsigned(macho::RIT_ARM_Branch24Bit); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); - return true; - - // Handle Thumb branches. - case ARM::fixup_arm_thumb_br: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); - Log2Size = llvm::Log2_32(2); - return true; - - case ARM::fixup_arm_thumb_bl: - case ARM::fixup_arm_thumb_blx: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); - Log2Size = llvm::Log2_32(4); - return true; - - case ARM::fixup_arm_movt_hi16: - case ARM::fixup_arm_movt_hi16_pcrel: - case ARM::fixup_t2_movt_hi16: - case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_HalfDifference); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); - return true; - - case ARM::fixup_arm_movw_lo16: - case ARM::fixup_arm_movw_lo16_pcrel: - case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movw_lo16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); - return true; - } -} -void MachObjectWriter::RecordARMRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - uint64_t &FixedValue) { - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Log2Size; - unsigned RelocType = macho::RIT_Vanilla; - if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { - report_fatal_error("unknown ARM fixup kind!"); - return; - } - - // If this is a difference or a defined symbol plus an offset, then we need a - // scattered relocation entry. Differences always require scattered - // relocations. - if (Target.getSymB()) { - if (RelocType == macho::RIT_ARM_Half || - RelocType == macho::RIT_ARM_HalfDifference) - return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup, - Target, FixedValue); - return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); - } - - // Get the symbol data, if any. - MCSymbolData *SD = 0; - if (Target.getSymA()) - SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); - - // FIXME: For other platforms, we need to use scattered relocations for - // internal relocations with offsets. If this is an internal relocation with - // an offset, it also needs a scattered relocation entry. - // - // Is this right for ARM? - uint32_t Offset = Target.getConstant(); - if (IsPCRel && RelocType == macho::RIT_Vanilla) - Offset += 1 << Log2Size; - if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) - return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target, - Log2Size, FixedValue); - - // See <reloc.h>. - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - if (Target.isAbsolute()) { // constant - // FIXME! - report_fatal_error("FIXME: relocations to absolute targets " - "not yet implemented"); - } else { - // Resolve constant variables. - if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; - } - } - - // Check whether we need an external or internal relocation. - if (doesSymbolRequireExternRelocation(SD)) { - IsExtern = 1; - Index = SD->getIndex(); - - // For external relocations, make sure to offset the fixup value to - // compensate for the addend of the symbol address, if it was - // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) - FixedValue -= Layout.getSymbolOffset(SD); - } else { - // The index is the section ordinal (1-based). - const MCSectionData &SymSD = Asm.getSectionData( - SD->getSymbol().getSection()); - Index = SymSD.getOrdinal() + 1; - FixedValue += getSectionAddress(&SymSD); - } - if (IsPCRel) - FixedValue -= getSectionAddress(Fragment->getParent()); - - // The type is determined by the fixup kind. - Type = RelocType; - } - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); -} - void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - // FIXME: These needs to be factored into the target Mach-O writer. - if (isARM()) { - RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; - } - if (is64Bit()) { - RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; - } - - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - - // If this is a 32-bit TLVP reloc it's handled a bit differently. - if (Target.getSymA() && - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { - RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; - } - - // If this is a difference or a defined symbol plus an offset, then we need a - // scattered relocation entry. Differences always require scattered - // relocations. - if (Target.getSymB()) - return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); - - // Get the symbol data, if any. - MCSymbolData *SD = 0; - if (Target.getSymA()) - SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); - - // If this is an internal relocation with an offset, it also needs a scattered - // relocation entry. - uint32_t Offset = Target.getConstant(); - if (IsPCRel) - Offset += 1 << Log2Size; - if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) - return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); - - // See <reloc.h>. - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - if (Target.isAbsolute()) { // constant - // SymbolNum of 0 indicates the absolute section. - // - // FIXME: Currently, these are never generated (see code below). I cannot - // find a case where they are actually emitted. - Type = macho::RIT_Vanilla; - } else { - // Resolve constant variables. - if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; - } - } - - // Check whether we need an external or internal relocation. - if (doesSymbolRequireExternRelocation(SD)) { - IsExtern = 1; - Index = SD->getIndex(); - // For external relocations, make sure to offset the fixup value to - // compensate for the addend of the symbol address, if it was - // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) - FixedValue -= Layout.getSymbolOffset(SD); - } else { - // The index is the section ordinal (1-based). - const MCSectionData &SymSD = Asm.getSectionData( - SD->getSymbol().getSection()); - Index = SymSD.getOrdinal() + 1; - FixedValue += getSectionAddress(&SymSD); - } - if (IsPCRel) - FixedValue -= getSectionAddress(Fragment->getParent()); - - Type = macho::RIT_Vanilla; - } - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); + TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup, + Target, FixedValue); } void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { diff --git a/lib/Target/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index e0a9de8..d72c346 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/SubtargetFeature.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" @@ -27,7 +27,7 @@ using namespace llvm; /// hasFlag - Determine if a feature has a flag; '+' or '-' /// -static inline bool hasFlag(const std::string &Feature) { +static inline bool hasFlag(const StringRef Feature) { assert(!Feature.empty() && "Empty string"); // Get first character char Ch = Feature[0]; @@ -37,13 +37,13 @@ static inline bool hasFlag(const std::string &Feature) { /// StripFlag - Return string stripped of flag. /// -static inline std::string StripFlag(const std::string &Feature) { +static inline std::string StripFlag(const StringRef Feature) { return hasFlag(Feature) ? Feature.substr(1) : Feature; } /// isEnabled - Return true if enable flag; '+'. /// -static inline bool isEnabled(const std::string &Feature) { +static inline bool isEnabled(const StringRef Feature) { assert(!Feature.empty() && "Empty string"); // Get first character char Ch = Feature[0]; @@ -53,16 +53,22 @@ static inline bool isEnabled(const std::string &Feature) { /// PrependFlag - Return a string with a prepended flag; '+' or '-'. /// -static inline std::string PrependFlag(const std::string &Feature, - bool IsEnabled) { +static inline std::string PrependFlag(const StringRef Feature, + bool IsEnabled) { assert(!Feature.empty() && "Empty string"); - if (hasFlag(Feature)) return Feature; - return std::string(IsEnabled ? "+" : "-") + Feature; + if (hasFlag(Feature)) + return Feature; + std::string Prefix = IsEnabled ? "+" : "-"; + Prefix += Feature; + return Prefix; } /// Split - Splits a string of comma separated items in to a vector of strings. /// -static void Split(std::vector<std::string> &V, const std::string &S) { +static void Split(std::vector<std::string> &V, const StringRef S) { + if (S.empty()) + return; + // Start at beginning of string. size_t Pos = 0; while (true) { @@ -88,7 +94,7 @@ static std::string Join(const std::vector<std::string> &V) { std::string Result; // If the vector is not empty if (!V.empty()) { - // Start with the CPU feature + // Start with the first feature Result = V[0]; // For each successive feature for (size_t i = 1; i < V.size(); i++) { @@ -103,7 +109,7 @@ static std::string Join(const std::vector<std::string> &V) { } /// Adding features. -void SubtargetFeatures::AddFeature(const std::string &String, +void SubtargetFeatures::AddFeature(const StringRef String, bool IsEnabled) { // Don't add empty features if (!String.empty()) { @@ -113,10 +119,10 @@ void SubtargetFeatures::AddFeature(const std::string &String, } /// Find KV in array using binary search. -template<typename T> const T *Find(const std::string &S, const T *A, size_t L) { +template<typename T> const T *Find(const StringRef S, const T *A, size_t L) { // Make the lower bound element we're looking for T KV; - KV.Key = S.c_str(); + KV.Key = S.data(); // Determine the end of the array const T *Hi = A + L; // Binary search the array @@ -170,7 +176,7 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, // SubtargetFeatures Implementation //===----------------------------------------------------------------------===// -SubtargetFeatures::SubtargetFeatures(const std::string &Initial) { +SubtargetFeatures::SubtargetFeatures(const StringRef Initial) { // Break up string into separate features Split(Features, Initial); } @@ -179,33 +185,6 @@ SubtargetFeatures::SubtargetFeatures(const std::string &Initial) { std::string SubtargetFeatures::getString() const { return Join(Features); } -void SubtargetFeatures::setString(const std::string &Initial) { - // Throw out old features - Features.clear(); - // Break up string into separate features - Split(Features, LowercaseString(Initial)); -} - - -/// setCPU - Set the CPU string. Replaces previous setting. Setting to "" -/// clears CPU. -void SubtargetFeatures::setCPU(const std::string &String) { - Features[0] = LowercaseString(String); -} - - -/// setCPUIfNone - Setting CPU string only if no string is set. -/// -void SubtargetFeatures::setCPUIfNone(const std::string &String) { - if (Features[0].empty()) setCPU(String); -} - -/// getCPU - Returns current CPU. -/// -const std::string & SubtargetFeatures::getCPU() const { - return Features[0]; -} - /// SetImpliedBits - For each feature that is (transitively) implied by this /// feature, set it. @@ -245,12 +224,13 @@ void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, } } -/// getBits - Get feature bits. +/// getFeatureBits - Get feature bits a CPU. /// -uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, - size_t CPUTableSize, - const SubtargetFeatureKV *FeatureTable, - size_t FeatureTableSize) { +uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, + const SubtargetFeatureKV *CPUTable, + size_t CPUTableSize, + const SubtargetFeatureKV *FeatureTable, + size_t FeatureTableSize) { assert(CPUTable && "missing CPU table"); assert(FeatureTable && "missing features table"); #ifndef NDEBUG @@ -266,12 +246,11 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, uint64_t Bits = 0; // Resulting bits // Check if help is needed - if (Features[0] == "help") + if (CPU == "help") Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize); // Find CPU entry - const SubtargetFeatureKV *CPUEntry = - Find(Features[0], CPUTable, CPUTableSize); + const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize); // If there is a match if (CPUEntry) { // Set base feature bits @@ -284,13 +263,13 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); } } else { - errs() << "'" << Features[0] + errs() << "'" << CPU << "' is not a recognized processor for this target" << " (ignoring processor)\n"; } // Iterate through each feature - for (size_t i = 1; i < Features.size(); i++) { - const std::string &Feature = Features[i]; + for (size_t i = 0, E = Features.size(); i < E; i++) { + const StringRef Feature = Features[i]; // Check for help if (Feature == "+help") @@ -323,9 +302,10 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, return Bits; } -/// Get info pointer -void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table, - size_t TableSize) { +/// Get scheduling itinerary of a CPU. +void *SubtargetFeatures::getItinerary(const StringRef CPU, + const SubtargetInfoKV *Table, + size_t TableSize) { assert(Table && "missing table"); #ifndef NDEBUG for (size_t i = 1; i < TableSize; i++) { @@ -334,12 +314,12 @@ void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table, #endif // Find entry - const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize); + const SubtargetInfoKV *Entry = Find(CPU, Table, TableSize); if (Entry) { return Entry->Value; } else { - errs() << "'" << Features[0] + errs() << "'" << CPU << "' is not a recognized processor for this target" << " (ignoring processor)\n"; return NULL; @@ -367,10 +347,7 @@ void SubtargetFeatures::dump() const { /// subtarget. It would be better if we could encode this information /// into the IR. See <rdar://5972456>. /// -void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU, - const Triple& Triple) { - setCPU(CPU); - +void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) { if (Triple.getVendor() == Triple::Apple) { if (Triple.getArch() == Triple::ppc) { // powerpc-apple-* diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp new file mode 100644 index 0000000..4b31c75 --- /dev/null +++ b/lib/Object/Binary.cpp @@ -0,0 +1,96 @@ +//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Binary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Binary.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +// Include headers for createBinary. +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/COFF.h" + +using namespace llvm; +using namespace object; + +Binary::~Binary() { + delete Data; +} + +Binary::Binary(unsigned int Type, MemoryBuffer *Source) + : TypeID(Type) + , Data(Source) {} + +StringRef Binary::getData() const { + return Data->getBuffer(); +} + +StringRef Binary::getFileName() const { + return Data->getBufferIdentifier(); +} + +error_code object::createBinary(MemoryBuffer *Source, + OwningPtr<Binary> &Result) { + OwningPtr<MemoryBuffer> scopedSource(Source); + if (!Source) + return make_error_code(errc::invalid_argument); + if (Source->getBufferSize() < 64) + return object_error::invalid_file_type; + sys::LLVMFileType type = sys::IdentifyFileType(Source->getBufferStart(), + static_cast<unsigned>(Source->getBufferSize())); + error_code ec; + switch (type) { + case sys::ELF_Relocatable_FileType: + case sys::ELF_Executable_FileType: + case sys::ELF_SharedObject_FileType: + case sys::ELF_Core_FileType: { + OwningPtr<Binary> ret( + ObjectFile::createELFObjectFile(scopedSource.take())); + if (!ret) + return object_error::invalid_file_type; + Result.swap(ret); + return object_error::success; + } + case sys::Mach_O_Object_FileType: + case sys::Mach_O_Executable_FileType: + case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: + case sys::Mach_O_Core_FileType: + case sys::Mach_O_PreloadExecutable_FileType: + case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: + case sys::Mach_O_DynamicLinker_FileType: + case sys::Mach_O_Bundle_FileType: + case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: { + OwningPtr<Binary> ret( + ObjectFile::createMachOObjectFile(scopedSource.take())); + if (!ret) + return object_error::invalid_file_type; + Result.swap(ret); + return object_error::success; + } + case sys::COFF_FileType: { + OwningPtr<Binary> ret(new COFFObjectFile(scopedSource.take(), ec)); + if (ec) return ec; + Result.swap(ret); + return object_error::success; + } + default: // Unrecognized object file format. + return object_error::invalid_file_type; + } +} + +error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) { + OwningPtr<MemoryBuffer> File; + if (error_code ec = MemoryBuffer::getFile(Path, File)) + return ec; + return createBinary(File.take(), Result); +} diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 703d385..68e5e94 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -1,6 +1,8 @@ add_llvm_library(LLVMObject + Binary.cpp COFFObjectFile.cpp ELFObjectFile.cpp + Error.cpp MachOObject.cpp MachOObjectFile.cpp Object.cpp diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 86bf44b..18aad9a 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -11,11 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/COFF.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Endian.h" using namespace llvm; using namespace object; @@ -28,174 +26,164 @@ using support::little16_t; } namespace { -struct coff_file_header { - ulittle16_t Machine; - ulittle16_t NumberOfSections; - ulittle32_t TimeDateStamp; - ulittle32_t PointerToSymbolTable; - ulittle32_t NumberOfSymbols; - ulittle16_t SizeOfOptionalHeader; - ulittle16_t Characteristics; -}; +// Returns false if size is greater than the buffer size. And sets ec. +bool checkSize(const MemoryBuffer *m, error_code &ec, uint64_t size) { + if (m->getBufferSize() < size) { + ec = object_error::unexpected_eof; + return false; + } + return true; } -extern char coff_file_header_layout_static_assert - [sizeof(coff_file_header) == 20 ? 1 : -1]; - -namespace { -struct coff_symbol { - struct StringTableOffset { - ulittle32_t Zeroes; - ulittle32_t Offset; - }; - - union { - char ShortName[8]; - StringTableOffset Offset; - } Name; - - ulittle32_t Value; - little16_t SectionNumber; - - struct { - ulittle8_t BaseType; - ulittle8_t ComplexType; - } Type; - - ulittle8_t StorageClass; - ulittle8_t NumberOfAuxSymbols; -}; +// Returns false if any bytes in [addr, addr + size) fall outsize of m. +bool checkAddr(const MemoryBuffer *m, + error_code &ec, + uintptr_t addr, + uint64_t size) { + if (addr + size < addr || + addr + size < size || + addr + size > uintptr_t(m->getBufferEnd())) { + ec = object_error::unexpected_eof; + return false; + } + return true; +} } -extern char coff_coff_symbol_layout_static_assert - [sizeof(coff_symbol) == 18 ? 1 : -1]; +const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Symb) const { + const coff_symbol *addr = reinterpret_cast<const coff_symbol*>(Symb.p); -namespace { -struct coff_section { - char Name[8]; - ulittle32_t VirtualSize; - ulittle32_t VirtualAddress; - ulittle32_t SizeOfRawData; - ulittle32_t PointerToRawData; - ulittle32_t PointerToRelocations; - ulittle32_t PointerToLinenumbers; - ulittle16_t NumberOfRelocations; - ulittle16_t NumberOfLinenumbers; - ulittle32_t Characteristics; -}; +# ifndef NDEBUG + // Verify that the symbol points to a valid entry in the symbol table. + uintptr_t offset = uintptr_t(addr) - uintptr_t(base()); + if (offset < Header->PointerToSymbolTable + || offset >= Header->PointerToSymbolTable + + (Header->NumberOfSymbols * sizeof(coff_symbol))) + report_fatal_error("Symbol was outside of symbol table."); + + assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol) + == 0 && "Symbol did not point to the beginning of a symbol"); +# endif + + return addr; } -extern char coff_coff_section_layout_static_assert - [sizeof(coff_section) == 40 ? 1 : -1]; +const coff_section *COFFObjectFile::toSec(DataRefImpl Sec) const { + const coff_section *addr = reinterpret_cast<const coff_section*>(Sec.p); -namespace { -class COFFObjectFile : public ObjectFile { -private: - uint64_t HeaderOff; - const coff_file_header *Header; - const coff_section *SectionTable; - const coff_symbol *SymbolTable; - const char *StringTable; - - const coff_section *getSection(std::size_t index) const; - const char *getString(std::size_t offset) const; - -protected: - virtual SymbolRef getSymbolNext(DataRefImpl Symb) const; - virtual StringRef getSymbolName(DataRefImpl Symb) const; - virtual uint64_t getSymbolAddress(DataRefImpl Symb) const; - virtual uint64_t getSymbolSize(DataRefImpl Symb) const; - virtual char getSymbolNMTypeChar(DataRefImpl Symb) const; - virtual bool isSymbolInternal(DataRefImpl Symb) const; - - virtual SectionRef getSectionNext(DataRefImpl Sec) const; - virtual StringRef getSectionName(DataRefImpl Sec) const; - virtual uint64_t getSectionAddress(DataRefImpl Sec) const; - virtual uint64_t getSectionSize(DataRefImpl Sec) const; - virtual StringRef getSectionContents(DataRefImpl Sec) const; - virtual bool isSectionText(DataRefImpl Sec) const; - -public: - COFFObjectFile(MemoryBuffer *Object); - virtual symbol_iterator begin_symbols() const; - virtual symbol_iterator end_symbols() const; - virtual section_iterator begin_sections() const; - virtual section_iterator end_sections() const; - - virtual uint8_t getBytesInAddress() const; - virtual StringRef getFileFormatName() const; - virtual unsigned getArch() const; -}; -} // end namespace - -SymbolRef COFFObjectFile::getSymbolNext(DataRefImpl Symb) const { - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); +# ifndef NDEBUG + // Verify that the section points to a valid entry in the section table. + if (addr < SectionTable + || addr >= (SectionTable + Header->NumberOfSections)) + report_fatal_error("Section was outside of section table."); + + uintptr_t offset = uintptr_t(addr) - uintptr_t(SectionTable); + assert(offset % sizeof(coff_section) == 0 && + "Section did not point to the beginning of a section"); +# endif + + return addr; +} + +error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb, + SymbolRef &Result) const { + const coff_symbol *symb = toSymb(Symb); symb += 1 + symb->NumberOfAuxSymbols; - Symb.p = reinterpret_cast<intptr_t>(symb); - return SymbolRef(Symb, this); + Symb.p = reinterpret_cast<uintptr_t>(symb); + Result = SymbolRef(Symb, this); + return object_error::success; } -StringRef COFFObjectFile::getSymbolName(DataRefImpl Symb) const { - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); + error_code COFFObjectFile::getSymbolName(DataRefImpl Symb, + StringRef &Result) const { + const coff_symbol *symb = toSymb(Symb); // Check for string table entry. First 4 bytes are 0. if (symb->Name.Offset.Zeroes == 0) { uint32_t Offset = symb->Name.Offset.Offset; - return StringRef(getString(Offset)); + if (error_code ec = getString(Offset, Result)) + return ec; + return object_error::success; } if (symb->Name.ShortName[7] == 0) // Null terminated, let ::strlen figure out the length. - return StringRef(symb->Name.ShortName); - // Not null terminated, use all 8 bytes. - return StringRef(symb->Name.ShortName, 8); + Result = StringRef(symb->Name.ShortName); + else + // Not null terminated, use all 8 bytes. + Result = StringRef(symb->Name.ShortName, 8); + return object_error::success; } -uint64_t COFFObjectFile::getSymbolAddress(DataRefImpl Symb) const { - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); - const coff_section *Section = getSection(symb->SectionNumber); - char Type = getSymbolNMTypeChar(Symb); +error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb, + uint64_t &Result) const { + const coff_symbol *symb = toSymb(Symb); + const coff_section *Section; + if (error_code ec = getSection(symb->SectionNumber, Section)) + return ec; + char Type; + if (error_code ec = getSymbolNMTypeChar(Symb, Type)) + return ec; if (Type == 'U' || Type == 'w') - return UnknownAddressOrSize; - if (Section) - return Section->VirtualAddress + symb->Value; - return symb->Value; + Result = UnknownAddressOrSize; + else if (Section) + Result = Section->VirtualAddress + symb->Value; + else + Result = symb->Value; + return object_error::success; } -uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Symb) const { +error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb, + uint64_t &Result) const { // FIXME: Return the correct size. This requires looking at all the symbols // in the same section as this symbol, and looking for either the next // symbol, or the end of the section. - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); - const coff_section *Section = getSection(symb->SectionNumber); - char Type = getSymbolNMTypeChar(Symb); + const coff_symbol *symb = toSymb(Symb); + const coff_section *Section; + if (error_code ec = getSection(symb->SectionNumber, Section)) + return ec; + char Type; + if (error_code ec = getSymbolNMTypeChar(Symb, Type)) + return ec; if (Type == 'U' || Type == 'w') - return UnknownAddressOrSize; - if (Section) - return Section->SizeOfRawData - symb->Value; - return 0; + Result = UnknownAddressOrSize; + else if (Section) + Result = Section->SizeOfRawData - symb->Value; + else + Result = 0; + return object_error::success; } -char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const { - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); - char ret = StringSwitch<char>(getSymbolName(Symb)) +error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb, + char &Result) const { + const coff_symbol *symb = toSymb(Symb); + StringRef name; + if (error_code ec = getSymbolName(Symb, name)) + return ec; + char ret = StringSwitch<char>(name) .StartsWith(".debug", 'N') .StartsWith(".sxdata", 'N') .Default('?'); - if (ret != '?') - return ret; + if (ret != '?') { + Result = ret; + return object_error::success; + } uint32_t Characteristics = 0; - if (const coff_section *Section = getSection(symb->SectionNumber)) { + if (symb->SectionNumber > 0) { + const coff_section *Section; + if (error_code ec = getSection(symb->SectionNumber, Section)) + return ec; Characteristics = Section->Characteristics; } switch (symb->SectionNumber) { case COFF::IMAGE_SYM_UNDEFINED: // Check storage classes. - if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) - return 'w'; // Don't do ::toupper. - else + if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) { + Result = 'w'; + return object_error::success; // Don't do ::toupper. + } else ret = 'u'; break; case COFF::IMAGE_SYM_ABSOLUTE: @@ -227,22 +215,28 @@ char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const { if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL) ret = ::toupper(ret); - return ret; + Result = ret; + return object_error::success; } -bool COFFObjectFile::isSymbolInternal(DataRefImpl Symb) const { - return false; +error_code COFFObjectFile::isSymbolInternal(DataRefImpl Symb, + bool &Result) const { + Result = false; + return object_error::success; } -SectionRef COFFObjectFile::getSectionNext(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); +error_code COFFObjectFile::getSectionNext(DataRefImpl Sec, + SectionRef &Result) const { + const coff_section *sec = toSec(Sec); sec += 1; - Sec.p = reinterpret_cast<intptr_t>(sec); - return SectionRef(Sec, this); + Sec.p = reinterpret_cast<uintptr_t>(sec); + Result = SectionRef(Sec, this); + return object_error::success; } -StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); +error_code COFFObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Result) const { + const coff_section *sec = toSec(Sec); StringRef name; if (sec->Name[7] == 0) // Null terminated, let ::strlen figure out the length. @@ -255,64 +249,117 @@ StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const { if (name[0] == '/') { uint32_t Offset; name.substr(1).getAsInteger(10, Offset); - return StringRef(getString(Offset)); + if (error_code ec = getString(Offset, name)) + return ec; } - // It's just a normal name. - return name; + Result = name; + return object_error::success; } -uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); - return sec->VirtualAddress; +error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec, + uint64_t &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->VirtualAddress; + return object_error::success; } -uint64_t COFFObjectFile::getSectionSize(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); - return sec->SizeOfRawData; +error_code COFFObjectFile::getSectionSize(DataRefImpl Sec, + uint64_t &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->SizeOfRawData; + return object_error::success; } -StringRef COFFObjectFile::getSectionContents(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); - return StringRef(reinterpret_cast<const char *>(base + sec->PointerToRawData), - sec->SizeOfRawData); +error_code COFFObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Result) const { + const coff_section *sec = toSec(Sec); + // The only thing that we need to verify is that the contents is contained + // within the file bounds. We don't need to make sure it doesn't cover other + // data, as there's nothing that says that is not allowed. + uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData; + uintptr_t con_end = con_start + sec->SizeOfRawData; + if (con_end >= uintptr_t(Data->getBufferEnd())) + return object_error::parse_failed; + Result = StringRef(reinterpret_cast<const char*>(con_start), + sec->SizeOfRawData); + return object_error::success; } -bool COFFObjectFile::isSectionText(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); - return sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE; +error_code COFFObjectFile::isSectionText(DataRefImpl Sec, + bool &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE; + return object_error::success; } -COFFObjectFile::COFFObjectFile(MemoryBuffer *Object) - : ObjectFile(Object) { +COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) + : ObjectFile(Binary::isCOFF, Object, ec) { + // Check that we at least have enough room for a header. + if (!checkSize(Data, ec, sizeof(coff_file_header))) return; - HeaderOff = 0; + // The actual starting location of the COFF header in the file. This can be + // non-zero in PE/COFF files. + uint64_t HeaderStart = 0; - if (base[0] == 0x4d && base[1] == 0x5a) { + // Check if this is a PE/COFF file. + if (base()[0] == 0x4d && base()[1] == 0x5a) { // PE/COFF, seek through MS-DOS compatibility stub and 4-byte // PE signature to find 'normal' COFF header. - HeaderOff += *reinterpret_cast<const ulittle32_t *>(base + 0x3c); - HeaderOff += 4; + if (!checkSize(Data, ec, 0x3c + 8)) return; + HeaderStart += *reinterpret_cast<const ulittle32_t *>(base() + 0x3c); + // Check the PE header. ("PE\0\0") + if (StringRef(reinterpret_cast<const char *>(base() + HeaderStart), 4) + != "PE\0\0") { + ec = object_error::parse_failed; + return; + } + HeaderStart += 4; // Skip the PE Header. } - Header = reinterpret_cast<const coff_file_header *>(base + HeaderOff); + Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart); + if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header))) + return; + SectionTable = - reinterpret_cast<const coff_section *>( base - + HeaderOff + reinterpret_cast<const coff_section *>( base() + + HeaderStart + sizeof(coff_file_header) + Header->SizeOfOptionalHeader); + if (!checkAddr(Data, ec, uintptr_t(SectionTable), + Header->NumberOfSections * sizeof(coff_section))) + return; + SymbolTable = - reinterpret_cast<const coff_symbol *>(base + Header->PointerToSymbolTable); + reinterpret_cast<const coff_symbol *>(base() + + Header->PointerToSymbolTable); + if (!checkAddr(Data, ec, uintptr_t(SymbolTable), + Header->NumberOfSymbols * sizeof(coff_symbol))) + return; // Find string table. - StringTable = reinterpret_cast<const char *>(base) - + Header->PointerToSymbolTable - + Header->NumberOfSymbols * 18; + StringTable = reinterpret_cast<const char *>(base()) + + Header->PointerToSymbolTable + + Header->NumberOfSymbols * sizeof(coff_symbol); + if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t))) + return; + + StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable); + if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize)) + return; + // Check that the string table is null terminated if has any in it. + if (StringTableSize < 4 + || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) { + ec = object_error::parse_failed; + return; + } + + ec = object_error::success; } ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { DataRefImpl ret; - memset(&ret, 0, sizeof(DataRefImpl)); + std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SymbolTable); return symbol_iterator(SymbolRef(ret, this)); } @@ -320,21 +367,21 @@ ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const { // The symbol table ends where the string table begins. DataRefImpl ret; - memset(&ret, 0, sizeof(DataRefImpl)); + std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(StringTable); return symbol_iterator(SymbolRef(ret, this)); } ObjectFile::section_iterator COFFObjectFile::begin_sections() const { DataRefImpl ret; - memset(&ret, 0, sizeof(DataRefImpl)); + std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable); return section_iterator(SectionRef(ret, this)); } ObjectFile::section_iterator COFFObjectFile::end_sections() const { DataRefImpl ret; - memset(&ret, 0, sizeof(DataRefImpl)); + std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections); return section_iterator(SectionRef(ret, this)); } @@ -365,24 +412,37 @@ unsigned COFFObjectFile::getArch() const { } } -const coff_section *COFFObjectFile::getSection(std::size_t index) const { - if (index > 0 && index <= Header->NumberOfSections) - return SectionTable + (index - 1); - return 0; +error_code COFFObjectFile::getSection(int32_t index, + const coff_section *&Result) const { + // Check for special index values. + if (index == COFF::IMAGE_SYM_UNDEFINED || + index == COFF::IMAGE_SYM_ABSOLUTE || + index == COFF::IMAGE_SYM_DEBUG) + Result = NULL; + else if (index > 0 && index <= Header->NumberOfSections) + // We already verified the section table data, so no need to check again. + Result = SectionTable + (index - 1); + else + return object_error::parse_failed; + return object_error::success; } -const char *COFFObjectFile::getString(std::size_t offset) const { - const ulittle32_t *StringTableSize = - reinterpret_cast<const ulittle32_t *>(StringTable); - if (offset < *StringTableSize) - return StringTable + offset; - return 0; +error_code COFFObjectFile::getString(uint32_t offset, + StringRef &Result) const { + if (StringTableSize <= 4) + // Tried to get a string from an empty string table. + return object_error::parse_failed; + if (offset >= StringTableSize) + return object_error::unexpected_eof; + Result = StringRef(StringTable + offset); + return object_error::success; } namespace llvm { ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) { - return new COFFObjectFile(Object); + error_code ec; + return new COFFObjectFile(Object, ec); } } // end namespace llvm diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index d2a2726..edf9824 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -222,22 +222,22 @@ class ELFObjectFile : public ObjectFile { const char *getString(const Elf_Shdr *section, uint32_t offset) const; protected: - virtual SymbolRef getSymbolNext(DataRefImpl Symb) const; - virtual StringRef getSymbolName(DataRefImpl Symb) const; - virtual uint64_t getSymbolAddress(DataRefImpl Symb) const; - virtual uint64_t getSymbolSize(DataRefImpl Symb) const; - virtual char getSymbolNMTypeChar(DataRefImpl Symb) const; - virtual bool isSymbolInternal(DataRefImpl Symb) const; - - virtual SectionRef getSectionNext(DataRefImpl Sec) const; - virtual StringRef getSectionName(DataRefImpl Sec) const; - virtual uint64_t getSectionAddress(DataRefImpl Sec) const; - virtual uint64_t getSectionSize(DataRefImpl Sec) const; - virtual StringRef getSectionContents(DataRefImpl Sec) const; - virtual bool isSectionText(DataRefImpl Sec) const; + virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; + virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; + virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; + virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const; + + virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; + virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; + virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const; + virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; public: - ELFObjectFile(MemoryBuffer *Object); + ELFObjectFile(MemoryBuffer *Object, error_code &ec); virtual symbol_iterator begin_symbols() const; virtual symbol_iterator end_symbols() const; virtual section_iterator begin_sections() const; @@ -259,9 +259,9 @@ void ELFObjectFile<target_endianness, is64Bits> // an error object around. if (!( symb && SymbolTableSection - && symb >= (const Elf_Sym*)(base + && symb >= (const Elf_Sym*)(base() + SymbolTableSection->sh_offset) - && symb < (const Elf_Sym*)(base + && symb < (const Elf_Sym*)(base() + SymbolTableSection->sh_offset + SymbolTableSection->sh_size))) // FIXME: Proper error handling. @@ -269,8 +269,9 @@ void ELFObjectFile<target_endianness, is64Bits> } template<support::endianness target_endianness, bool is64Bits> -SymbolRef ELFObjectFile<target_endianness, is64Bits> - ::getSymbolNext(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolNext(DataRefImpl Symb, + SymbolRef &Result) const { validateSymbol(Symb); const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b]; @@ -287,63 +288,80 @@ SymbolRef ELFObjectFile<target_endianness, is64Bits> } } - return SymbolRef(Symb, this); + Result = SymbolRef(Symb, this); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -StringRef ELFObjectFile<target_endianness, is64Bits> - ::getSymbolName(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolName(DataRefImpl Symb, + StringRef &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); if (symb->st_name == 0) { const Elf_Shdr *section = getSection(symb->st_shndx); if (!section) - return ""; - return getString(dot_shstrtab_sec, section->sh_name); + Result = ""; + else + Result = getString(dot_shstrtab_sec, section->sh_name); + return object_error::success; } // Use the default symbol table name section. - return getString(dot_strtab_sec, symb->st_name); + Result = getString(dot_strtab_sec, symb->st_name); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -uint64_t ELFObjectFile<target_endianness, is64Bits> - ::getSymbolAddress(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolAddress(DataRefImpl Symb, + uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section; switch (symb->st_shndx) { case ELF::SHN_COMMON: // Undefined symbols have no address yet. - case ELF::SHN_UNDEF: return UnknownAddressOrSize; - case ELF::SHN_ABS: return symb->st_value; + case ELF::SHN_UNDEF: + Result = UnknownAddressOrSize; + return object_error::success; + case ELF::SHN_ABS: + Result = symb->st_value; + return object_error::success; default: Section = getSection(symb->st_shndx); } switch (symb->getType()) { - case ELF::STT_SECTION: return Section ? Section->sh_addr - : UnknownAddressOrSize; + case ELF::STT_SECTION: + Result = Section ? Section->sh_addr : UnknownAddressOrSize; + return object_error::success; case ELF::STT_FUNC: case ELF::STT_OBJECT: case ELF::STT_NOTYPE: - return symb->st_value; - default: return UnknownAddressOrSize; + Result = symb->st_value; + return object_error::success; + default: + Result = UnknownAddressOrSize; + return object_error::success; } } template<support::endianness target_endianness, bool is64Bits> -uint64_t ELFObjectFile<target_endianness, is64Bits> - ::getSymbolSize(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolSize(DataRefImpl Symb, + uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); if (symb->st_size == 0) - return UnknownAddressOrSize; - return symb->st_size; + Result = UnknownAddressOrSize; + Result = symb->st_size; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -char ELFObjectFile<target_endianness, is64Bits> - ::getSymbolNMTypeChar(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolNMTypeChar(DataRefImpl Symb, + char &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section = getSection(symb->st_shndx); @@ -390,89 +408,110 @@ char ELFObjectFile<target_endianness, is64Bits> ret = 'W'; } - if (ret == '?' && symb->getType() == ELF::STT_SECTION) - return StringSwitch<char>(getSymbolName(Symb)) + if (ret == '?' && symb->getType() == ELF::STT_SECTION) { + StringRef name; + if (error_code ec = getSymbolName(Symb, name)) + return ec; + Result = StringSwitch<char>(name) .StartsWith(".debug", 'N') .StartsWith(".note", 'n'); + return object_error::success; + } - return ret; + Result = ret; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -bool ELFObjectFile<target_endianness, is64Bits> - ::isSymbolInternal(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::isSymbolInternal(DataRefImpl Symb, + bool &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); if ( symb->getType() == ELF::STT_FILE || symb->getType() == ELF::STT_SECTION) - return true; - return false; + Result = true; + Result = false; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -SectionRef ELFObjectFile<target_endianness, is64Bits> - ::getSectionNext(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const { const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p); sec += Header->e_shentsize; Sec.p = reinterpret_cast<intptr_t>(sec); - return SectionRef(Sec, this); + Result = SectionRef(Sec, this); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -StringRef ELFObjectFile<target_endianness, is64Bits> - ::getSectionName(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionName(DataRefImpl Sec, + StringRef &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); - return StringRef(getString(dot_shstrtab_sec, sec->sh_name)); + Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name)); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -uint64_t ELFObjectFile<target_endianness, is64Bits> - ::getSectionAddress(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionAddress(DataRefImpl Sec, + uint64_t &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); - return sec->sh_addr; + Result = sec->sh_addr; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -uint64_t ELFObjectFile<target_endianness, is64Bits> - ::getSectionSize(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionSize(DataRefImpl Sec, + uint64_t &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); - return sec->sh_size; + Result = sec->sh_size; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -StringRef ELFObjectFile<target_endianness, is64Bits> - ::getSectionContents(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionContents(DataRefImpl Sec, + StringRef &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); - const char *start = (char*)base + sec->sh_offset; - return StringRef(start, sec->sh_size); + const char *start = (const char*)base() + sec->sh_offset; + Result = StringRef(start, sec->sh_size); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -bool ELFObjectFile<target_endianness, is64Bits> - ::isSectionText(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::isSectionText(DataRefImpl Sec, + bool &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); if (sec->sh_flags & ELF::SHF_EXECINSTR) - return true; - return false; + Result = true; + else + Result = false; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object) - : ObjectFile(Object) +ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object + , error_code &ec) + : ObjectFile(Binary::isELF, Object, ec) , SectionHeaderTable(0) , dot_shstrtab_sec(0) , dot_strtab_sec(0) { - Header = reinterpret_cast<const Elf_Ehdr *>(base); + Header = reinterpret_cast<const Elf_Ehdr *>(base()); if (Header->e_shoff == 0) return; SectionHeaderTable = - reinterpret_cast<const Elf_Shdr *>(base + Header->e_shoff); + reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff); uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize; if (!( (const uint8_t *)SectionHeaderTable + SectionTableSize - <= base + MapFile->getBufferSize())) + <= base() + Data->getBufferSize())) // FIXME: Proper error handling. report_fatal_error("Section table goes past end of file!"); @@ -491,7 +530,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object) dot_shstrtab_sec = getSection(Header->e_shstrndx); if (dot_shstrtab_sec) { // Verify that the last byte in the string table in a null. - if (((const char*)base + dot_shstrtab_sec->sh_offset) + if (((const char*)base() + dot_shstrtab_sec->sh_offset) [dot_shstrtab_sec->sh_size - 1] != 0) // FIXME: Proper error handling. report_fatal_error("String table must end with a null terminator!"); @@ -509,7 +548,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object) // FIXME: Proper error handling. report_fatal_error("Already found section named .strtab!"); dot_strtab_sec = sh; - const char *dot_strtab = (const char*)base + sh->sh_offset; + const char *dot_strtab = (const char*)base() + sh->sh_offset; if (dot_strtab[sh->sh_size - 1] != 0) // FIXME: Proper error handling. report_fatal_error("String table must end with a null terminator!"); @@ -548,7 +587,7 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> ::begin_sections() const { DataRefImpl ret; memset(&ret, 0, sizeof(DataRefImpl)); - ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff); + ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff); return section_iterator(SectionRef(ret, this)); } @@ -557,7 +596,7 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> ::end_sections() const { DataRefImpl ret; memset(&ret, 0, sizeof(DataRefImpl)); - ret.p = reinterpret_cast<intptr_t>(base + ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff + (Header->e_shentsize * Header->e_shnum)); return section_iterator(SectionRef(ret, this)); @@ -613,7 +652,7 @@ const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym * ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const { const Elf_Shdr *sec = SymbolTableSections[Symb.d.b]; return reinterpret_cast<const Elf_Sym *>( - base + base() + sec->sh_offset + (Symb.d.a * sec->sh_entsize)); } @@ -656,8 +695,8 @@ const char *ELFObjectFile<target_endianness, is64Bits> assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!"); if (offset >= section->sh_size) // FIXME: Proper error handling. - report_fatal_error("Sybol name offset outside of string table!"); - return (const char *)base + section->sh_offset + offset; + report_fatal_error("Symbol name offset outside of string table!"); + return (const char *)base() + section->sh_offset + offset; } // EI_CLASS, EI_DATA. @@ -673,14 +712,15 @@ namespace llvm { ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) { std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object); + error_code ec; if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) - return new ELFObjectFile<support::little, false>(Object); + return new ELFObjectFile<support::little, false>(Object, ec); else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) - return new ELFObjectFile<support::big, false>(Object); + return new ELFObjectFile<support::big, false>(Object, ec); else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) - return new ELFObjectFile<support::little, true>(Object); + return new ELFObjectFile<support::little, true>(Object, ec); else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) - return new ELFObjectFile<support::big, true>(Object); + return new ELFObjectFile<support::big, true>(Object, ec); // FIXME: Proper error handling. report_fatal_error("Not an ELF object file!"); } diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp new file mode 100644 index 0000000..2594625 --- /dev/null +++ b/lib/Object/Error.cpp @@ -0,0 +1,57 @@ +//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines a new error_category for the Object library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Error.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; +using namespace object; + +namespace { +class _object_error_category : public _do_message { +public: + virtual const char* name() const; + virtual std::string message(int ev) const; + virtual error_condition default_error_condition(int ev) const; +}; +} + +const char *_object_error_category::name() const { + return "llvm.object"; +} + +std::string _object_error_category::message(int ev) const { + switch (ev) { + case object_error::success: return "Success"; + case object_error::invalid_file_type: + return "The file was not recognized as a valid object file"; + case object_error::parse_failed: + return "Invalid data was encountered while parsing the file"; + case object_error::unexpected_eof: + return "The end of the file was unexpectedly encountered"; + default: + llvm_unreachable("An enumerator of object_error does not have a message " + "defined."); + } +} + +error_condition _object_error_category::default_error_condition(int ev) const { + if (ev == object_error::success) + return errc::success; + return errc::invalid_argument; +} + +const error_category &object::object_category() { + static _object_error_category o; + return o; +} diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 877cbfb..71f1f8c 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -32,8 +32,8 @@ typedef MachOObject::LoadCommandInfo LoadCommandInfo; class MachOObjectFile : public ObjectFile { public: - MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO) - : ObjectFile(Object), + MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec) + : ObjectFile(Binary::isMachO, Object, ec), MachOObj(MOO), RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {} @@ -47,19 +47,19 @@ public: virtual unsigned getArch() const; protected: - virtual SymbolRef getSymbolNext(DataRefImpl Symb) const; - virtual StringRef getSymbolName(DataRefImpl Symb) const; - virtual uint64_t getSymbolAddress(DataRefImpl Symb) const; - virtual uint64_t getSymbolSize(DataRefImpl Symb) const; - virtual char getSymbolNMTypeChar(DataRefImpl Symb) const; - virtual bool isSymbolInternal(DataRefImpl Symb) const; - - virtual SectionRef getSectionNext(DataRefImpl Sec) const; - virtual StringRef getSectionName(DataRefImpl Sec) const; - virtual uint64_t getSectionAddress(DataRefImpl Sec) const; - virtual uint64_t getSectionSize(DataRefImpl Sec) const; - virtual StringRef getSectionContents(DataRefImpl Sec) const; - virtual bool isSectionText(DataRefImpl Sec) const; + virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; + virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; + virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; + virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const; + + virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; + virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; + virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const; + virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; private: MachOObject *MachOObj; @@ -73,11 +73,12 @@ private: }; ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { + error_code ec; std::string Err; MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err); if (!MachOObj) return NULL; - return new MachOObjectFile(Buffer, MachOObj); + return new MachOObjectFile(Buffer, MachOObj, ec); } /*===-- Symbols -----------------------------------------------------------===*/ @@ -114,29 +115,38 @@ void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI, } -SymbolRef MachOObjectFile::getSymbolNext(DataRefImpl DRI) const { +error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI, + SymbolRef &Result) const { DRI.d.b++; moveToNextSymbol(DRI); - return SymbolRef(DRI, this); + Result = SymbolRef(DRI, this); + return object_error::success; } -StringRef MachOObjectFile::getSymbolName(DataRefImpl DRI) const { +error_code MachOObjectFile::getSymbolName(DataRefImpl DRI, + StringRef &Result) const { InMemoryStruct<macho::SymbolTableEntry> Entry; getSymbolTableEntry(DRI, Entry); - return MachOObj->getStringAtIndex(Entry->StringIndex); + Result = MachOObj->getStringAtIndex(Entry->StringIndex); + return object_error::success; } -uint64_t MachOObjectFile::getSymbolAddress(DataRefImpl DRI) const { +error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI, + uint64_t &Result) const { InMemoryStruct<macho::SymbolTableEntry> Entry; getSymbolTableEntry(DRI, Entry); - return Entry->Value; + Result = Entry->Value; + return object_error::success; } -uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const { - return UnknownAddressOrSize; +error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, + uint64_t &Result) const { + Result = UnknownAddressOrSize; + return object_error::success; } -char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const { +error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI, + char &Result) const { InMemoryStruct<macho::SymbolTableEntry> Entry; getSymbolTableEntry(DRI, Entry); @@ -156,13 +166,16 @@ char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const { if (Entry->Flags & (macho::STF_External | macho::STF_PrivateExtern)) Char = toupper(Char); - return Char; + Result = Char; + return object_error::success; } -bool MachOObjectFile::isSymbolInternal(DataRefImpl DRI) const { +error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI, + bool &Result) const { InMemoryStruct<macho::SymbolTableEntry> Entry; getSymbolTableEntry(DRI, Entry); - return Entry->Flags & macho::STF_StabsEntryMask; + Result = Entry->Flags & macho::STF_StabsEntryMask; + return object_error::success; } ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const { @@ -204,10 +217,12 @@ void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const { } } -SectionRef MachOObjectFile::getSectionNext(DataRefImpl DRI) const { +error_code MachOObjectFile::getSectionNext(DataRefImpl DRI, + SectionRef &Result) const { DRI.d.b++; moveToNextSection(DRI); - return SectionRef(DRI, this); + Result = SectionRef(DRI, this); + return object_error::success; } void @@ -219,43 +234,53 @@ MachOObjectFile::getSection(DataRefImpl DRI, MachOObj->ReadSection(LCI, DRI.d.b, Res); } -StringRef MachOObjectFile::getSectionName(DataRefImpl DRI) const { +error_code MachOObjectFile::getSectionName(DataRefImpl DRI, + StringRef &Result) const { InMemoryStruct<macho::SegmentLoadCommand> SLC; LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); MachOObj->ReadSegmentLoadCommand(LCI, SLC); InMemoryStruct<macho::Section> Sect; MachOObj->ReadSection(LCI, DRI.d.b, Sect); - static char Result[34]; - strcpy(Result, SLC->Name); - strcat(Result, ","); - strcat(Result, Sect->Name); - return StringRef(Result); + static char result[34]; + strcpy(result, SLC->Name); + strcat(result, ","); + strcat(result, Sect->Name); + Result = StringRef(result); + return object_error::success; } -uint64_t MachOObjectFile::getSectionAddress(DataRefImpl DRI) const { +error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI, + uint64_t &Result) const { InMemoryStruct<macho::Section> Sect; getSection(DRI, Sect); - return Sect->Address; + Result = Sect->Address; + return object_error::success; } -uint64_t MachOObjectFile::getSectionSize(DataRefImpl DRI) const { +error_code MachOObjectFile::getSectionSize(DataRefImpl DRI, + uint64_t &Result) const { InMemoryStruct<macho::Section> Sect; getSection(DRI, Sect); - return Sect->Size; + Result = Sect->Size; + return object_error::success; } -StringRef MachOObjectFile::getSectionContents(DataRefImpl DRI) const { +error_code MachOObjectFile::getSectionContents(DataRefImpl DRI, + StringRef &Result) const { InMemoryStruct<macho::Section> Sect; getSection(DRI, Sect); - return MachOObj->getData(Sect->Offset, Sect->Size); + Result = MachOObj->getData(Sect->Offset, Sect->Size); + return object_error::success; } -bool MachOObjectFile::isSectionText(DataRefImpl DRI) const { +error_code MachOObjectFile::isSectionText(DataRefImpl DRI, + bool &Result) const { InMemoryStruct<macho::SegmentLoadCommand> SLC; LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); MachOObj->ReadSegmentLoadCommand(LCI, SLC); - return !strcmp(SLC->Name, "__TEXT"); + Result = !strcmp(SLC->Name, "__TEXT"); + return object_error::success; } ObjectFile::section_iterator MachOObjectFile::begin_sections() const { diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 603b23c..9a373ad 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -41,19 +41,28 @@ LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile, } void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { - // We can't use unwrap() here because the argument to ++ must be an lvalue. - ++*reinterpret_cast<ObjectFile::section_iterator*>(SI); + error_code ec; + unwrap(SI)->increment(ec); + if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message()); } const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { - return (*unwrap(SI))->getName().data(); + StringRef ret; + if (error_code ec = (*unwrap(SI))->getName(ret)) + report_fatal_error(ec.message()); + return ret.data(); } uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { - return (*unwrap(SI))->getSize(); + uint64_t ret; + if (error_code ec = (*unwrap(SI))->getSize(ret)) + report_fatal_error(ec.message()); + return ret; } const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { - return (*unwrap(SI))->getContents().data(); + StringRef ret; + if (error_code ec = (*unwrap(SI))->getContents(ret)) + report_fatal_error(ec.message()); + return ret.data(); } - diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 47b6311..a7798df 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -21,18 +21,8 @@ using namespace llvm; using namespace object; -ObjectFile::ObjectFile(MemoryBuffer *Object) - : MapFile(Object) { - assert(MapFile && "Must be a valid MemoryBuffer!"); - base = reinterpret_cast<const uint8_t *>(MapFile->getBufferStart()); -} - -ObjectFile::~ObjectFile() { - delete MapFile; -} - -StringRef ObjectFile::getFilename() const { - return MapFile->getBufferIdentifier(); +ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec) + : Binary(Type, source) { } ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp index 79e9897..5e438a9 100644 --- a/lib/Target/ARM/ARMAsmBackend.cpp +++ b/lib/Target/ARM/ARMAsmBackend.cpp @@ -174,7 +174,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { Value >>= 16; // Fallthrough case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movt_hi16_pcrel: + case ARM::fixup_t2_movt_hi16_pcrel: //FIXME: Shouldn't this be shifted like + // the other hi16 fixup? case ARM::fixup_t2_movw_lo16_pcrel: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned i = (Value & 0x800) >> 11; @@ -184,8 +185,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // inst{26} = i; // inst{14-12} = Mid3; // inst{7-0} = Lo8; - assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) && - "Out of range pc-relative fixup value!"); + // The value comes in as the whole thing, not just the portion required + // for this fixup, so we need to mask off the bits not handled by this + // portion (lo vs. hi). + Value &= 0xffff; Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); uint64_t swapped = (Value & 0xFFFF0000) >> 16; swapped |= (Value & 0x0000FFFF) << 16; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index eb73902..7240837 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1010,19 +1010,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { MI->dump(); assert(0 && "Unsupported opcode for unwinding information"); case ARM::MOVr: - case ARM::tMOVgpr2gpr: - case ARM::tMOVgpr2tgpr: Offset = 0; break; case ARM::ADDri: Offset = -MI->getOperand(2).getImm(); break; case ARM::SUBri: - case ARM::t2SUBrSPi: - Offset = MI->getOperand(2).getImm(); + Offset = MI->getOperand(2).getImm(); break; case ARM::tSUBspi: - Offset = MI->getOperand(2).getImm()*4; + Offset = MI->getOperand(2).getImm()*4; break; case ARM::tADDspi: case ARM::tADDrSPi: @@ -1097,13 +1094,22 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } - case ARM::t2ADDrSPi: - case ARM::t2ADDrSPi12: - case ARM::t2SUBrSPi: - case ARM::t2SUBrSPi12: - assert ((MI->getOperand(1).getReg() == ARM::SP) && - "Unexpected source register!"); - break; + case ARM::t2LDMIA_RET: { + // As above for LDMIA_RET. Map to the tPOP instruction. + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(ARM::t2LDMIA_UPD); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::tPOP_RET: { + // As above for LDMIA_RET. Map to the tPOP instruction. + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(ARM::tPOP); + OutStreamer.EmitInstruction(TmpInst); + return; + } case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass"); case ARM::DBG_VALUE: { @@ -1215,6 +1221,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.setOpcode(ARM::tMOVr); TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(TmpInst); } { @@ -1445,7 +1454,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::t2BR_JT: { // Lower and emit the instruction itself, then the jump table following it. MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVgpr2gpr); + TmpInst.setOpcode(ARM::tMOVr); TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); // Add predicate operands. @@ -1494,7 +1503,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // mov pc, target MCInst TmpInst; unsigned Opc = MI->getOpcode() == ARM::BR_JTr ? - ARM::MOVr : ARM::tMOVgpr2gpr; + ARM::MOVr : ARM::tMOVr; TmpInst.setOpcode(Opc); TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); @@ -1507,7 +1516,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); // Make sure the Thumb jump table is 4-byte aligned. - if (Opc == ARM::tMOVgpr2gpr) + if (Opc == ARM::tMOVr) EmitAlignment(2); // Output the data for the jump table itself @@ -1599,11 +1608,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *Label = GetARMSJLJEHLabel(); { MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVgpr2tgpr); + TmpInst.setOpcode(ARM::tMOVr); TmpInst.addOperand(MCOperand::CreateReg(ValReg)); TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // 's' bit operand - TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.AddComment("eh_setjmp begin"); OutStreamer.EmitInstruction(TmpInst); } @@ -1817,7 +1827,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } { MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVtgpr2gpr); + TmpInst.setOpcode(ARM::tMOVr); TmpInst.addOperand(MCOperand::CreateReg(ARM::SP)); TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); // Predicate. diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h index 36edbad..4c9ecdf 100644 --- a/lib/Target/ARM/ARMBaseInfo.h +++ b/lib/Target/ARM/ARMBaseInfo.h @@ -25,11 +25,13 @@ // Defines symbolic names for ARM registers. This defines a mapping from // register name to register number. // -#include "ARMGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "ARMGenRegisterInfo.inc" // Defines symbolic names for the ARM instructions. // -#include "ARMGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "ARMGenInstrInfo.inc" namespace llvm { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 44a3976..9f56637 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -18,7 +18,6 @@ #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "ARMGenInstrInfo.inc" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" @@ -35,6 +34,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/STLExtras.h" + +#define GET_INSTRINFO_MC_DESC +#include "ARMGenInstrInfo.inc" + using namespace llvm; static cl::opt<bool> @@ -74,7 +77,8 @@ static const ARM_MLxEntry ARM_MLxTable[] = { }; ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) - : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), + : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts), + ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), Subtarget(STI) { for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) @@ -136,9 +140,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *UpdateMI = NULL; MachineInstr *MemMI = NULL; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); - const TargetInstrDesc &TID = MI->getDesc(); - unsigned NumOps = TID.getNumOperands(); - bool isLoad = !TID.mayStore(); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumOps = MCID.getNumOperands(); + bool isLoad = !MCID.mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); @@ -475,8 +479,8 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { // FIXME: This confuses implicit_def with optional CPSR def. - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) return false; bool Found = false; @@ -495,11 +499,11 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, /// By default, this returns true for every instruction with a /// PredicateOperand. bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isPredicable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isPredicable()) return false; - if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); return AFI->isThumb2Function(); @@ -525,8 +529,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); // Basic size info comes from the TSFlags field. - const TargetInstrDesc &TID = MI->getDesc(); - uint64_t TSFlags = TID.TSFlags; + const MCInstrDesc &MCID = MI->getDesc(); + uint64_t TSFlags = MCID.TSFlags; unsigned Opc = MI->getOpcode(); switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { @@ -588,9 +592,9 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // entry is one byte; TBH two byte each. unsigned EntrySize = (Opc == ARM::t2TBB_JT) ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = TID.getNumOperands(); + unsigned NumOps = MCID.getNumOperands(); MachineOperand JTOP = - MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); + MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); assert(MJTI != 0); @@ -788,7 +792,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, break; case ARM::STRi12: case ARM::t2STRi12: - case ARM::tSpill: + case ARM::tSTRspi: case ARM::VSTRD: case ARM::VSTRS: if (MI->getOperand(1).isFI() && @@ -923,7 +927,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, break; case ARM::LDRi12: case ARM::t2LDRi12: - case ARM::tRestore: + case ARM::tLDRspi: case ARM::VLDRD: case ARM::VLDRS: if (MI->getOperand(1).isFI() && @@ -1363,7 +1367,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); bool isSub = false; @@ -1803,7 +1807,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; - const TargetInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; if (UOps) @@ -1906,10 +1910,10 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const { - int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1; + int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; if (RegNo <= 0) // Def is the address writeback. return ItinData->getOperandCycle(DefClass, DefIdx); @@ -1924,7 +1928,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = RegNo; bool isSLoad = false; - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::VLDMSIA: case ARM::VLDMSIA_UPD: @@ -1947,10 +1951,10 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const { - int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1; + int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; if (RegNo <= 0) // Def is the address writeback. return ItinData->getOperandCycle(DefClass, DefIdx); @@ -1982,10 +1986,10 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const { - int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1; + int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; if (RegNo <= 0) return ItinData->getOperandCycle(UseClass, UseIdx); @@ -1999,7 +2003,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = RegNo; bool isSStore = false; - switch (UseTID.getOpcode()) { + switch (UseMCID.getOpcode()) { default: break; case ARM::VSTMSIA: case ARM::VSTMSIA_UPD: @@ -2022,10 +2026,10 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const { - int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1; + int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; if (RegNo <= 0) return ItinData->getOperandCycle(UseClass, UseIdx); @@ -2051,14 +2055,14 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefIdx, unsigned DefAlign, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const { - unsigned DefClass = DefTID.getSchedClass(); - unsigned UseClass = UseTID.getSchedClass(); + unsigned DefClass = DefMCID.getSchedClass(); + unsigned UseClass = UseMCID.getSchedClass(); - if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands()) + if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); // This may be a def / use of a variable_ops instruction, the operand @@ -2066,7 +2070,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // figure it out. int DefCycle = -1; bool LdmBypass = false; - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); break; @@ -2077,7 +2081,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLDMSIA: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: - DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); + DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); break; case ARM::LDMIA_RET: @@ -2098,7 +2102,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: LdmBypass = 1; - DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); + DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); break; } @@ -2107,7 +2111,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefCycle = 2; int UseCycle = -1; - switch (UseTID.getOpcode()) { + switch (UseMCID.getOpcode()) { default: UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); break; @@ -2118,7 +2122,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VSTMSIA: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: - UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); + UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); break; case ARM::STMIA: @@ -2137,7 +2141,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2STMDB: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: - UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); + UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); break; } @@ -2150,7 +2154,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (LdmBypass) { // It's a variable_ops instruction so we can't use DefIdx here. Just use // first def operand. - if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1, + if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, UseClass, UseIdx)) --UseCycle; } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, @@ -2170,11 +2174,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefMI->isRegSequence() || DefMI->isImplicitDef()) return 1; - const TargetInstrDesc &DefTID = DefMI->getDesc(); + const MCInstrDesc &DefMCID = DefMI->getDesc(); if (!ItinData || ItinData->isEmpty()) - return DefTID.mayLoad() ? 3 : 1; + return DefMCID.mayLoad() ? 3 : 1; - const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); const MachineOperand &DefMO = DefMI->getOperand(DefIdx); if (DefMO.getReg() == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { @@ -2183,7 +2187,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } // CPSR set and branch can be paired in the same cycle. - if (UseTID.isBranch()) + if (UseMCID.isBranch()) return 0; } @@ -2191,14 +2195,14 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() ? (*UseMI->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, - UseTID, UseIdx, UseAlign); + int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, + UseMCID, UseIdx, UseAlign); if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2223,7 +2227,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8: case ARM::VLD1q16: @@ -2327,37 +2331,37 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!DefNode->isMachineOpcode()) return 1; - const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode()); + const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); - if (isZeroCost(DefTID.Opcode)) + if (isZeroCost(DefMCID.Opcode)) return 0; if (!ItinData || ItinData->isEmpty()) - return DefTID.mayLoad() ? 3 : 1; + return DefMCID.mayLoad() ? 3 : 1; if (!UseNode->isMachineOpcode()) { - int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx); + int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); if (Subtarget.isCortexA9()) return Latency <= 2 ? 1 : Latency - 1; else return Latency <= 3 ? 1 : Latency - 2; } - const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode()); + const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); unsigned DefAlign = !DefMN->memoperands_empty() ? (*DefMN->memoperands_begin())->getAlignment() : 0; const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); unsigned UseAlign = !UseMN->memoperands_empty() ? (*UseMN->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, - UseTID, UseIdx, UseAlign); + int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, + UseMCID, UseIdx, UseAlign); if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2384,7 +2388,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8Pseudo: case ARM::VLD1q16Pseudo: @@ -2503,10 +2507,10 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; - const TargetInstrDesc &TID = MI->getDesc(); - unsigned Class = TID.getSchedClass(); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR)) + if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 96f0e76..ab93cde 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -353,25 +353,25 @@ public: SDNode *UseNode, unsigned UseIdx) const; private: int getVLDMDefCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const; int getLDMDefCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const; int getVSTMUseCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const; int getSTMUseCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const; int getOperandLatency(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefIdx, unsigned DefAlign, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const; int getInstrLatency(const InstrItineraryData *ItinData, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 9dc51b8..e46082d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -40,6 +40,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "ARMGenRegisterInfo.inc" + using namespace llvm; static cl::opt<bool> @@ -54,8 +58,7 @@ EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), - TII(tii), STI(sti), + : ARMGenRegisterInfo(), TII(tii), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } @@ -955,7 +958,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, int64_t ARMBaseRegisterInfo:: getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { - const TargetInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); int64_t InstrOffs = 0;; int Scale = 1; @@ -1105,11 +1108,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - const TargetInstrDesc &TID = TII.get(ADDriOpc); + const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MRI.constrainRegClass(BaseReg, TID.OpInfo[0].getRegClass(this)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); - MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, TID, BaseReg) + MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) @@ -1145,7 +1148,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const { - const TargetInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); unsigned i = 0; @@ -1281,11 +1284,5 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Update the original instruction to use the scratch register. MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); - if (MI.getOpcode() == ARM::t2ADDrSPi) - MI.setDesc(TII.get(ARM::t2ADDri)); - else if (MI.getOpcode() == ARM::t2SUBrSPi) - MI.setDesc(TII.get(ARM::t2SUBri)); } } - -#include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 70b6f01..b4b4059 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -16,7 +16,9 @@ #include "ARM.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "ARMGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "ARMGenRegisterInfo.inc" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 97bac88..7ed07c2 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -98,13 +98,13 @@ namespace { void addPCLabel(unsigned LabelID); void emitPseudoInstruction(const MachineInstr &MI); unsigned getMachineSoRegOpValue(const MachineInstr &MI, - const TargetInstrDesc &TID, + const MCInstrDesc &MCID, const MachineOperand &MO, unsigned OpIdx); unsigned getMachineSoImmOpValue(unsigned SoImm); unsigned getAddrModeSBit(const MachineInstr &MI, - const TargetInstrDesc &TID) const; + const MCInstrDesc &MCID) const; void emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRd = 0, @@ -461,9 +461,9 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); else if (MO.isCPI()) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // For VFP load, the immediate offset is multiplied by 4. - unsigned Reloc = ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) + unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; emitConstPoolAddress(MO.getIndex(), Reloc); } else if (MO.isJTI()) @@ -830,7 +830,7 @@ void ARMCodeEmitter::emitLEApcrelInstruction(const MachineInstr &MI) { void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { // It's basically add r, pc, (LJTI - $+8) - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Emit the 'add' instruction. unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100 @@ -839,7 +839,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { Binary |= II->getPredicate(&MI) << ARMII::CondShift; // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, TID); + Binary |= getAddrModeSBit(MI, MCID); // Encode Rd. Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; @@ -999,7 +999,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, - const TargetInstrDesc &TID, + const MCInstrDesc &MCID, const MachineOperand &MO, unsigned OpIdx) { unsigned Binary = getMachineOpValue(MI, MO); @@ -1069,8 +1069,8 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { } unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, - const TargetInstrDesc &TID) const { - for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){ + const MCInstrDesc &MCID) const { + for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) return 1 << ARMII::S_BitShift; @@ -1081,7 +1081,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1095,10 +1095,10 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= II->getPredicate(&MI) << ARMII::CondShift; // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, TID); + Binary |= getAddrModeSBit(MI, MCID); // Encode register def if there is one. - unsigned NumDefs = TID.getNumDefs(); + unsigned NumDefs = MCID.getNumDefs(); unsigned OpIdx = 0; if (NumDefs) Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; @@ -1106,7 +1106,23 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, // Special handling for implicit use (e.g. PC). Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); - if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) { + if (MCID.Opcode == ARM::MOVi16) { + // Get immediate from MI. + unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movw); + // Encode imm which is the same as in emitMOVi32immInstruction(). + Binary |= Lo16 & 0xFFF; + Binary |= ((Lo16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if(MCID.Opcode == ARM::MOVTi16) { + unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movt) >> 16); + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { uint32_t v = ~MI.getOperand(2).getImm(); int32_t lsb = CountTrailingZeros_32(v); int32_t msb = (32 - CountLeadingZeros_32(v)) - 1; @@ -1115,7 +1131,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= (lsb & 0x1F) << 7; emitWordLE(Binary); return; - } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) { + } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) { // Encode Rn in Instr{0-3} Binary |= getMachineOpValue(MI, OpIdx++); @@ -1130,11 +1146,11 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, } // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; // Encode first non-shifter register operand if there is one. - bool isUnary = TID.TSFlags & ARMII::UnaryDP; + bool isUnary = MCID.TSFlags & ARMII::UnaryDP; if (!isUnary) { if (ImplicitRn) // Special handling for implicit use (e.g. PC). @@ -1147,9 +1163,9 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, // Encode shifter operand. const MachineOperand &MO = MI.getOperand(OpIdx); - if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { // Encode SoReg. - emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx)); + emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx)); return; } @@ -1168,9 +1184,9 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned Form = TID.TSFlags & ARMII::FormMask; - bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1216,7 +1232,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; // If this is a two-address operand, skip it. e.g. LDR_PRE. - if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; const MachineOperand &MO2 = MI.getOperand(OpIdx); @@ -1252,9 +1268,9 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, unsigned ImplicitRn) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned Form = TID.TSFlags & ARMII::FormMask; - bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1276,7 +1292,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; // Skip LDRD and STRD's second operand. - if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD) + if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD) ++OpIdx; // Set second operand @@ -1287,7 +1303,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; // If this is a two-address operand, skip it. e.g. LDRH_POST. - if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; const MachineOperand &MO2 = MI.getOperand(OpIdx); @@ -1337,8 +1353,8 @@ static unsigned getAddrModeUPBits(unsigned Mode) { } void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0; + const MCInstrDesc &MCID = MI.getDesc(); + bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1382,7 +1398,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1391,12 +1407,12 @@ void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { Binary |= II->getPredicate(&MI) << ARMII::CondShift; // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, TID); + Binary |= getAddrModeSBit(MI, MCID); // 32x32->64bit operations have two destination registers. The number // of register definitions will tell us if that's what we're dealing with. unsigned OpIdx = 0; - if (TID.getNumDefs() == 2) + if (MCID.getNumDefs() == 2) Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift; // Encode Rd @@ -1410,16 +1426,16 @@ void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { // Many multiple instructions (e.g. MLA) have three src operands. Encode // it as Rn (for multiply, that's in the same offset as RdLo. - if (TID.getNumOperands() > OpIdx && - !TID.OpInfo[OpIdx].isPredicate() && - !TID.OpInfo[OpIdx].isOptionalDef()) + if (MCID.getNumOperands() > OpIdx && + !MCID.OpInfo[OpIdx].isPredicate() && + !MCID.OpInfo[OpIdx].isOptionalDef()) Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift; emitWordLE(Binary); } void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1448,15 +1464,15 @@ void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand. if (MI.getOperand(OpIdx).isImm() && - !TID.OpInfo[OpIdx].isPredicate() && - !TID.OpInfo[OpIdx].isOptionalDef()) + !MCID.OpInfo[OpIdx].isPredicate() && + !MCID.OpInfo[OpIdx].isOptionalDef()) Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift; emitWordLE(Binary); } void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1465,7 +1481,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { Binary |= II->getPredicate(&MI) << ARMII::CondShift; // PKH instructions are finished at this point - if (TID.Opcode == ARM::PKHBT || TID.Opcode == ARM::PKHTB) { + if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) { emitWordLE(Binary); return; } @@ -1476,9 +1492,9 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; const MachineOperand &MO = MI.getOperand(OpIdx++); - if (OpIdx == TID.getNumOperands() || - TID.OpInfo[OpIdx].isPredicate() || - TID.OpInfo[OpIdx].isOptionalDef()) { + if (OpIdx == MCID.getNumOperands() || + MCID.OpInfo[OpIdx].isPredicate() || + MCID.OpInfo[OpIdx].isOptionalDef()) { // Encode Rm and it's done. Binary |= getMachineOpValue(MI, MO); emitWordLE(Binary); @@ -1493,7 +1509,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { // Encode shift_imm. unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); - if (TID.Opcode == ARM::PKHTB) { + if (MCID.Opcode == ARM::PKHTB) { assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); if (ShiftAmt == 32) ShiftAmt = 0; @@ -1505,7 +1521,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGen. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1518,11 +1534,11 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { // Encode saturate bit position. unsigned Pos = MI.getOperand(1).getImm(); - if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16) + if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16) Pos -= 1; assert((Pos < 16 || (Pos < 32 && - TID.Opcode != ARM::SSAT16 && - TID.Opcode != ARM::USAT16)) && + MCID.Opcode != ARM::SSAT16 && + MCID.Opcode != ARM::USAT16)) && "saturate bit position out of range"); Binary |= Pos << 16; @@ -1530,7 +1546,7 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, 2); // Encode shift_imm. - if (TID.getNumOperands() == 4) { + if (MCID.getNumOperands() == 4) { unsigned ShiftOp = MI.getOperand(3).getImm(); ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); if (Opc == ARM_AM::asr) @@ -1546,9 +1562,9 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); - if (TID.Opcode == ARM::TPsoft) { + if (MCID.Opcode == ARM::TPsoft) { llvm_unreachable("ARM::TPsoft FIXME"); // FIXME } @@ -1589,20 +1605,20 @@ void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) { } void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Handle jump tables. - if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) { + if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) { // First emit a ldr pc, [] instruction. emitDataProcessingInstruction(MI, ARM::PC); // Then emit the inline jump table. unsigned JTIndex = - (TID.Opcode == ARM::BR_JTr) + (MCID.Opcode == ARM::BR_JTr) ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); emitInlineJumpTable(JTIndex); return; - } else if (TID.Opcode == ARM::BR_JTm) { + } else if (MCID.Opcode == ARM::BR_JTm) { // First emit a ldr pc, [] instruction. emitLoadStoreInstruction(MI, ARM::PC); @@ -1617,7 +1633,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; - if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR) + if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) // The return register is LR. Binary |= getARMRegisterNumbering(ARM::LR); else @@ -1673,7 +1689,7 @@ static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { } void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1687,16 +1703,16 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { Binary |= encodeVFPRd(MI, OpIdx++); // If this is a two-address operand, skip it, e.g. FMACD. - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; // Encode Dn / Sn. - if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) Binary |= encodeVFPRn(MI, OpIdx++); - if (OpIdx == TID.getNumOperands() || - TID.OpInfo[OpIdx].isPredicate() || - TID.OpInfo[OpIdx].isOptionalDef()) { + if (OpIdx == MCID.getNumOperands() || + MCID.OpInfo[OpIdx].isPredicate() || + MCID.OpInfo[OpIdx].isOptionalDef()) { // FCMPEZD etc. has only one operand. emitWordLE(Binary); return; @@ -1709,8 +1725,8 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned Form = TID.TSFlags & ARMII::FormMask; + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1806,8 +1822,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0; + const MCInstrDesc &MCID = MI.getDesc(); + bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1912,8 +1928,8 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { unsigned Binary = getBinaryCodeForInstr(MI); unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; - const TargetInstrDesc &TID = MI.getDesc(); - if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { + const MCInstrDesc &MCID = MI.getDesc(); + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { RegTOpIdx = 0; RegNOpIdx = 1; LnOpIdx = 2; @@ -1980,12 +1996,12 @@ void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); unsigned Binary = getBinaryCodeForInstr(MI); // Destination register is encoded in Dd; source register in Dm. unsigned OpIdx = 0; Binary |= encodeNEONRd(MI, OpIdx++); - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; Binary |= encodeNEONRm(MI, OpIdx); if (IsThumb) @@ -1995,15 +2011,15 @@ void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); unsigned Binary = getBinaryCodeForInstr(MI); // Destination register is encoded in Dd; source registers in Dn and Dm. unsigned OpIdx = 0; Binary |= encodeNEONRd(MI, OpIdx++); - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; Binary |= encodeNEONRn(MI, OpIdx++); - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; Binary |= encodeNEONRm(MI, OpIdx); if (IsThumb) diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index baf95a3..309caee 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1692,9 +1692,9 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; - const TargetInstrDesc &TID = MI->getDesc(); - unsigned NumOps = TID.getNumOperands(); - unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumOps = MCID.getNumOperands(); + unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1815,9 +1815,9 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; - const TargetInstrDesc &TID = MI->getDesc(); - unsigned NumOps = TID.getNumOperands(); - unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumOps = MCID.getNumOperands(); + unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index b6b3c75..53a5f7d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -68,7 +68,7 @@ namespace { void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI) { - const TargetInstrDesc &Desc = OldMI.getDesc(); + const MCInstrDesc &Desc = OldMI.getDesc(); for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = OldMI.getOperand(i); @@ -856,10 +856,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case ARM::tTPsoft: case ARM::TPsoft: { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::BL)) + TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL)) .addExternalSymbol("__aeabi_read_tp", 0); MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 5cf73c4..f469d7e 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -219,8 +219,8 @@ class ARMFastISel : public FastISel { // we don't care about implicit defs here, just places we'll need to add a // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.hasOptionalDef()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.hasOptionalDef()) return false; // Look to see if our OptionalDef is defining CPSR or CCR. @@ -234,15 +234,15 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { } bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); // If we're a thumb2 or not NEON function we were handled via isPredicable. - if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || + if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || AFI->isThumb2Function()) return false; - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) - if (TID.OpInfo[i].isPredicate()) + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) + if (MCID.OpInfo[i].isPredicate()) return true; return false; @@ -278,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); return ResultReg; @@ -288,7 +288,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -308,7 +308,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -331,7 +331,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -355,7 +355,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -377,7 +377,7 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -400,7 +400,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -423,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -442,7 +442,7 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1549,7 +1549,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, NumBytes = CCInfo.getNextStackOffset(); // Issue CALLSEQ_START - unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) .addImm(NumBytes)); @@ -1647,7 +1647,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes) { // Issue CALLSEQ_END - unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(0)); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 4ef2666..9e943e4 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -268,14 +268,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // bic r4, r4, MaxAlign // mov sp, r4 // FIXME: It will be better just to find spare register here. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4) - .addReg(ARM::SP, RegState::Kill); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill)); AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2BICri), ARM::R4) .addReg(ARM::R4, RegState::Kill) .addImm(MaxAlign-1))); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) - .addReg(ARM::R4, RegState::Kill); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill)); } AFI->setShouldRestoreSPFromFP(true); @@ -293,9 +293,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(ARM::SP) .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else - BuildMI(MBB, MBBI, dl, - TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister()) - .addReg(ARM::SP); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + RegInfo->getBaseRegister()) + .addReg(ARM::SP)); } // If the frame has variable sized objects then the epilogue must restore @@ -364,8 +364,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) - .addReg(ARM::R4); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + ARM::SP) + .addReg(ARM::R4)); } } else { // Thumb2 or ARM. @@ -373,8 +374,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) - .addReg(FramePtr); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + ARM::SP) + .addReg(FramePtr)); } } else if (NumBytes) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp index 3f02383..4bdd4f1 100644 --- a/lib/Target/ARM/ARMGlobalMerge.cpp +++ b/lib/Target/ARM/ARMGlobalMerge.cpp @@ -175,7 +175,9 @@ bool ARMGlobalMerge::doInitialization(Module &M) { continue; // Ignore fancy-aligned globals for now. - if (I->getAlignment() != 0) + unsigned Alignment = I->getAlignment(); + unsigned AllocSize = TD->getTypeAllocSize(I->getType()->getElementType()); + if (Alignment > AllocSize) continue; // Ignore all 'special' globals. @@ -183,7 +185,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) { I->getName().startswith(".llvm.")) continue; - if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) { + if (AllocSize < MaxOffset) { const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering(); if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal()) BSSGlobals.push_back(I); diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 517bba8..787f6a2 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -19,11 +19,11 @@ using namespace llvm; static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, const TargetRegisterInfo &TRI) { // FIXME: Detect integer instructions properly. - const TargetInstrDesc &TID = MI->getDesc(); - unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (TID.mayStore()) + const MCInstrDesc &MCID = MI->getDesc(); + unsigned Domain = MCID.TSFlags & ARMII::DomainMask; + if (MCID.mayStore()) return false; - unsigned Opcode = TID.getOpcode(); + unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) return false; if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) @@ -43,15 +43,15 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following // a VMLA / VMLS will cause 4 cycle stall. - const TargetInstrDesc &TID = MI->getDesc(); - if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { + const MCInstrDesc &MCID = MI->getDesc(); + if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { MachineInstr *DefMI = LastMI; - const TargetInstrDesc &LastTID = LastMI->getDesc(); + const MCInstrDesc &LastMCID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. - if (!LastTID.isBarrier() && + if (!LastMCID.isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) && - (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { + !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) && + (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { I = llvm::prior(I); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6f57a04..2c9481b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -329,10 +329,10 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (Use->getOpcode() == ISD::CopyToReg) return true; if (Use->isMachineOpcode()) { - const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode()); - if (TID.mayStore()) + const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); + if (MCID.mayStore()) return true; - unsigned Opcode = TID.getOpcode(); + unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) return true; // vmlx feeding into another vmlx. We actually want to unfold diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4ae4af1..fb738cd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -506,6 +506,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::FP_TO_SINT); + setTargetDAGCombine(ISD::FP_TO_UINT); + setTargetDAGCombine(ISD::FDIV); } computeRegisterProperties(); @@ -974,12 +977,12 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - if (TID.getNumDefs() == 0) + if (MCID.getNumDefs() == 0) return Sched::RegPressure; if (!Itins->isEmpty() && - Itins->getOperandCycle(TID.getSchedClass(), 0) > 2) + Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) return Sched::Latency; return Sched::RegPressure; @@ -5523,7 +5526,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, return SDValue(); } -// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction +// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction // (only after legalization). static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, @@ -5554,25 +5557,25 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, SDNode *V = Vec.getNode(); unsigned nextIndex = 0; - // For each operands to the ADD which are BUILD_VECTORs, + // For each operands to the ADD which are BUILD_VECTORs, // check to see if each of their operands are an EXTRACT_VECTOR with // the same vector and appropriate index. for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - + SDValue ExtVec0 = N0->getOperand(i); SDValue ExtVec1 = N1->getOperand(i); - + // First operand is the vector, verify its the same. if (V != ExtVec0->getOperand(0).getNode() || V != ExtVec1->getOperand(0).getNode()) return SDValue(); - + // Second is the constant, verify its correct. ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); - + // For the constant, we want to see all the even or all the odd. if (!C0 || !C1 || C0->getZExtValue() != nextIndex || C1->getZExtValue() != nextIndex+1) @@ -5580,7 +5583,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Increment index. nextIndex+=2; - } else + } else return SDValue(); } @@ -5595,7 +5598,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Input is the vector. Ops.push_back(Vec); - + // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); @@ -5624,7 +5627,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); if (Result.getNode()) return Result; - + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); @@ -6479,7 +6482,105 @@ static SDValue PerformVDUPLANECombine(SDNode *N, return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); } -/// getVShiftImm - Check if this is a valid build_vector for the immediate +// isConstVecPow2 - Return true if each vector element is a power of 2, all +// elements are the same constant, C, and Log2(C) ranges from 1 to 32. +static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) +{ + integerPart cN; + integerPart c0 = 0; + for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); + I != E; I++) { + ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); + if (!C) + return false; + + bool isExact; + APFloat APF = C->getValueAPF(); + if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) + != APFloat::opOK || !isExact) + return false; + + c0 = (I == 0) ? cN : c0; + if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) + return false; + } + C = c0; + return true; +} + +/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) +/// can replace combinations of VMUL and VCVT (floating-point to integer) +/// when the VMUL has a constant operand that is a power of 2. +/// +/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): +/// vmul.f32 d16, d17, d16 +/// vcvt.s32.f32 d16, d16 +/// becomes: +/// vcvt.s32.f32 d16, d16, #3 +static SDValue PerformVCVTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + + if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || + Op.getOpcode() != ISD::FMUL) + return SDValue(); + + uint64_t C; + SDValue N0 = Op->getOperand(0); + SDValue ConstVec = Op->getOperand(1); + bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; + + if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || + !isConstVecPow2(ConstVec, isSigned, C)) + return SDValue(); + + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : + Intrinsic::arm_neon_vcvtfp2fxu; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + N->getValueType(0), + DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, + DAG.getConstant(Log2_64(C), MVT::i32)); +} + +/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) +/// can replace combinations of VCVT (integer to floating-point) and VDIV +/// when the VDIV has a constant operand that is a power of 2. +/// +/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): +/// vcvt.f32.s32 d16, d16 +/// vdiv.f32 d16, d17, d16 +/// becomes: +/// vcvt.f32.s32 d16, d16, #3 +static SDValue PerformVDIVCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + unsigned OpOpcode = Op.getNode()->getOpcode(); + + if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || + (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) + return SDValue(); + + uint64_t C; + SDValue ConstVec = N->getOperand(1); + bool isSigned = OpOpcode == ISD::SINT_TO_FP; + + if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || + !isConstVecPow2(ConstVec, isSigned, C)) + return SDValue(); + + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : + Intrinsic::arm_neon_vcvtfxu2fp; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + Op.getValueType(), + DAG.getConstant(IntrinsicOpcode, MVT::i32), + Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); +} + +/// Getvshiftimm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { @@ -6868,6 +6969,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); + case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: @@ -7378,6 +7482,10 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { default: break; case 'l': return C_RegisterClass; case 'w': return C_RegisterClass; + case 'h': return C_RegisterClass; + case 'x': return C_RegisterClass; + case 't': return C_RegisterClass; + case 'j': return C_Other; // Constant for movw. } } else if (Constraint.size() == 2) { switch (Constraint[0]) { @@ -7423,26 +7531,43 @@ ARMTargetLowering::getSingleConstraintMatchWeight( return weight; } -std::pair<unsigned, const TargetRegisterClass*> +typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; +RCPair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { - case 'l': + case 'l': // Low regs or general regs. if (Subtarget->isThumb()) - return std::make_pair(0U, ARM::tGPRRegisterClass); + return RCPair(0U, ARM::tGPRRegisterClass); else - return std::make_pair(0U, ARM::GPRRegisterClass); + return RCPair(0U, ARM::GPRRegisterClass); + case 'h': // High regs or no regs. + if (Subtarget->isThumb()) + return RCPair(0U, ARM::hGPRRegisterClass); + break; case 'r': - return std::make_pair(0U, ARM::GPRRegisterClass); + return RCPair(0U, ARM::GPRRegisterClass); case 'w': if (VT == MVT::f32) - return std::make_pair(0U, ARM::SPRRegisterClass); + return RCPair(0U, ARM::SPRRegisterClass); if (VT.getSizeInBits() == 64) - return std::make_pair(0U, ARM::DPRRegisterClass); + return RCPair(0U, ARM::DPRRegisterClass); if (VT.getSizeInBits() == 128) - return std::make_pair(0U, ARM::QPRRegisterClass); + return RCPair(0U, ARM::QPRRegisterClass); + break; + case 'x': + if (VT == MVT::f32) + return RCPair(0U, ARM::SPR_8RegisterClass); + if (VT.getSizeInBits() == 64) + return RCPair(0U, ARM::DPR_8RegisterClass); + if (VT.getSizeInBits() == 128) + return RCPair(0U, ARM::QPR_8RegisterClass); + break; + case 't': + if (VT == MVT::f32) + return RCPair(0U, ARM::SPRRegisterClass); break; } } @@ -7452,47 +7577,6 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -std::vector<unsigned> ARMTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { // GCC ARM Constraint Letters - default: break; - case 'l': - return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - 0); - case 'r': - return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, ARM::R11, - ARM::R12, ARM::LR, 0); - case 'w': - if (VT == MVT::f32) - return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, - ARM::S12,ARM::S13,ARM::S14,ARM::S15, - ARM::S16,ARM::S17,ARM::S18,ARM::S19, - ARM::S20,ARM::S21,ARM::S22,ARM::S23, - ARM::S24,ARM::S25,ARM::S26,ARM::S27, - ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); - if (VT.getSizeInBits() == 64) - return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10,ARM::D11, - ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); - if (VT.getSizeInBits() == 128) - return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, - ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); - break; - } - - return std::vector<unsigned>(); -} - /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, @@ -7507,6 +7591,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; + case 'j': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); @@ -7521,6 +7606,13 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; switch (ConstraintLetter) { + case 'j': + // Constant suitable for movw, must be between 0 and + // 65535. + if (Subtarget->hasV6T2Ops()) + if (CVal >= 0 && CVal <= 65535) + break; + return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 21a9a3a..dd9df0e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -306,9 +306,6 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 6f48d96..adcbf18 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -14,7 +14,6 @@ #include "ARMInstrInfo.h" #include "ARM.h" #include "ARMAddressingModes.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 5c013de..cdb1fe0 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -676,7 +676,7 @@ include "ARMInstrFormats.td" /// binop that produces a value. multiclass AsI1_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { + PatFrag opnode, string baseOpc, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -716,6 +716,24 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let Inst{15-12} = Rd; let Inst{11-0} = shift; } + + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, + so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, + GPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn, + so_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; } /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the @@ -1988,6 +2006,8 @@ defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>; } // neverHasSideEffects // Load / Store Multiple Mnemonic Aliases +def : MnemonicAlias<"ldmfd", "ldmia">; +def : MnemonicAlias<"stmfd", "stmdb">; def : MnemonicAlias<"ldm", "ldmia">; def : MnemonicAlias<"stm", "stmia">; @@ -2205,10 +2225,10 @@ def UBFX : I<(outs GPR:$Rd), defm ADD : AsI1_bin_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>; defm SUB : AsI1_bin_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>>; + BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">; // ADD and SUB with 's' bit set. defm ADDS : AI1_bin_s_irs<0b0100, "adds", @@ -2531,16 +2551,16 @@ def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>; defm AND : AsI1_bin_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>; defm BIC : AsI1_bin_irs<0b1110, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, (not node:$RHS))>>; + BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">; def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 44fbc02..0b14976 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -34,9 +34,10 @@ def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; -def imm0_255 : ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 256; -}]>; +def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; } +def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { + let ParserMatchClass = imm0_255_asmoperand; +} def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; @@ -407,15 +408,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // FIXME: remove when we have a way to marking a MI with these properties. let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in -def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), - IIC_iPop_Br, - "pop${p}\t$regs", []>, - T1Misc<{1,1,0,?,?,?,?}> { - // A8.6.121 - bits<16> regs; - let Inst{8} = regs{15}; // registers = P:'0000000':register_list - let Inst{7-0} = regs{7-0}; -} +def tPOP_RET : tPseudoInst<(outs), (ins pred:$p, reglist:$regs, variable_ops), + Size2Bytes, IIC_iPop_Br, []>; // All calls clobber the non-callee saved registers. SP is marked as a use to // prevent stack-pointer assignments that appear immediately before calls from @@ -685,19 +679,6 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } -// Special instruction for restore. It cannot clobber condition register -// when it's expanded by eliminateCallFramePseudoInstr(). -let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in -// FIXME: Pseudo for tLDRspi -def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i, - "ldr", "\t$dst, $addr", []>, - T1LdStSP<{1,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. let canFoldAsLoad = 1, isReMaterializable = 1 in @@ -754,19 +735,6 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, let Inst{7-0} = addr; } -let mayStore = 1, neverHasSideEffects = 1 in -// Special instruction for spill. It cannot clobber condition register when it's -// expanded by eliminateCallFramePseudoInstr(). -// FIXME: Pseudo for tSTRspi -def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i, - "str", "\t$src, $addr", []>, - T1LdStSP<{0,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -1072,7 +1040,7 @@ def tLSRrr : // A8.6.91 // Move register let isMoveImm = 1 in -def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi, +def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi, "mov", "\t$Rd, $imm8", [(set tGPR:$Rd, imm0_255:$imm8)]>, T1General<{1,0,0,?,?}> { @@ -1086,15 +1054,15 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi, // TODO: A7-73: MOV(2) - mov setting flag. let neverHasSideEffects = 1 in { -// FIXME: Make this predicable. -def tMOVr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, - "mov\t$Rd, $Rm", []>, - T1Special<0b1000> { +def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone, + Size2Bytes, IIC_iMOVr, + "mov", "\t$Rd, $Rm", "", []>, + T1Special<{1,0,?,?}> { // A8.6.97 bits<4> Rd; bits<4> Rm; - // Bits {7-6} are encoded by the T1Special value. - let Inst{5-3} = Rm{2-0}; + let Inst{7} = Rd{3}; + let Inst{6-3} = Rm; let Inst{2-0} = Rd{2-0}; } let Defs = [CPSR] in @@ -1107,39 +1075,6 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, let Inst{5-3} = Rm; let Inst{2-0} = Rd; } - -// FIXME: Make these predicable. -def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, - "mov\t$Rd, $Rm", []>, - T1Special<{1,0,0,?}> { - // A8.6.97 - bits<4> Rd; - bits<4> Rm; - // Bit {7} is encoded by the T1Special value. - let Inst{6-3} = Rm; - let Inst{2-0} = Rd{2-0}; -} -def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, - "mov\t$Rd, $Rm", []>, - T1Special<{1,0,?,0}> { - // A8.6.97 - bits<4> Rd; - bits<4> Rm; - // Bit {6} is encoded by the T1Special value. - let Inst{7} = Rd{3}; - let Inst{5-3} = Rm{2-0}; - let Inst{2-0} = Rd{2-0}; -} -def tMOVgpr2gpr : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, - "mov\t$Rd, $Rm", []>, - T1Special<{1,0,?,?}> { - // A8.6.97 - bits<4> Rd; - bits<4> Rm; - let Inst{7} = Rd{3}; - let Inst{6-3} = Rm; - let Inst{2-0} = Rd{2-0}; -} } // neverHasSideEffects // Multiply register @@ -1424,13 +1359,11 @@ def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, // // __aeabi_read_tp preserves the registers r1-r3. -let isCall = 1, Defs = [R0, LR], Uses = [SP] in -def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, - "bl\t__aeabi_read_tp", - [(set R0, ARMthread_pointer)]> { - // Encoding is 0xf7fffffe. - let Inst = 0xf7fffffe; -} +// This is a pseudo inst so that we can get the encoding right, +// complete with fixup for the aeabi_read_tp function. +let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in +def tTPsoft : tPseudoInst<(outs), (ins), Size4Bytes, IIC_Br, + [(set R0, ARMthread_pointer)]>; //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 090670b..d49b282 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -44,9 +44,11 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. +def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; } def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getT2SOImmVal(Imm) != -1; }]> { + let ParserMatchClass = t2_so_imm_asmoperand; let EncoderMethod = "getT2SOImmOpValue"; } @@ -463,7 +465,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0, string wide = ""> { + PatFrag opnode, string baseOpc, bit Commutable = 0, + string wide = ""> { // shifted imm def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii, @@ -495,14 +498,31 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, let Inst{26-25} = 0b01; let Inst{24-21} = opcod; } + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsThumb2]>; + def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + rGPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsThumb2]>; + def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsThumb2]>; } /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need -// the ".w" prefix to indicate that they are wide. +// the ".w" suffix to indicate that they are wide. multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> : - T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">; + PatFrag opnode, string baseOpc, bit Commutable = 0> : + T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">; /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are /// reversed. The 'rr' form is only defined for the disassembler; for codegen @@ -1149,63 +1169,6 @@ def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), []>; -// FIXME: None of these add/sub SP special instructions should be necessary -// at all for thumb2 since they use the same encodings as the generic -// add/sub instructions. In thumb1 we need them since they have dedicated -// encodings. At the least, they should be pseudo instructions. -// ADD r, sp, {so_imm|i12} -let isCodeGenOnly = 1 in { -def t2ADDrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), - IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = 0b1000; - let Inst{15} = 0; -} -def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), - IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11110; - let Inst{25-20} = 0b100000; - let Inst{15} = 0; -} - -// ADD r, sp, so_reg -def t2ADDrSPs : T2sTwoRegShiftedReg< - (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = 0b1000; - let Inst{15} = 0; -} - -// SUB r, sp, {so_imm|i12} -def t2SUBrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), - IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = 0b1101; - let Inst{15} = 0; -} -def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), - IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11110; - let Inst{25-20} = 0b101010; - let Inst{15} = 0; -} - -// SUB r, sp, so_reg -def t2SUBrSPs : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm), - IIC_iALUsi, - "sub", "\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = 0b1101; - let Inst{19-16} = 0b1101; // Rn = sp - let Inst{15} = 0; -} -} // end isCodeGenOnly = 1 - //===----------------------------------------------------------------------===// // Load / store Instructions. // @@ -1645,6 +1608,10 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, let Inst{15} = 0; } +def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, cc_out:$s)>, + Requires<[IsThumb2]>; + let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi, "movw", "\t$Rd, $imm", @@ -2063,17 +2030,18 @@ def t2MOVsra_flag : T2TwoRegShiftImm< defm t2AND : T2I_bin_w_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>; defm t2ORR : T2I_bin_w_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>; defm t2EOR : T2I_bin_w_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>; defm t2BIC : T2I_bin_w_irs<0b0001, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, (not node:$RHS))>>; + BinOpFrag<(and node:$LHS, (not node:$RHS))>, + "t2BIC">; class T2BitFI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -2173,7 +2141,8 @@ let Constraints = "$src = $Rd" in { defm t2ORN : T2I_bin_irs<0b0011, "orn", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">; + BinOpFrag<(or node:$LHS, (not node:$RHS))>, + "t2ORN", 0, "">; // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in @@ -2709,6 +2678,8 @@ def t2MOVCCr : T2TwoReg< let Inst{7-4} = 0b0000; } +// FIXME: Pseudo-ize these. For now, just mark codegen only. +let isCodeGenOnly = 1 in { let isMoveImm = 1 in def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), IIC_iCMOVi, "mov", ".w\t$Rd, $imm", @@ -2789,6 +2760,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, RegConstraint<"$false = $Rd">; } // neverHasSideEffects +} // isCodeGenOnly = 1 //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -2937,22 +2909,6 @@ def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex", } //===----------------------------------------------------------------------===// -// TLS Instructions -// - -// __aeabi_read_tp preserves the registers r1-r3. -let isCall = 1, - Defs = [R0, R12, LR, CPSR], Uses = [SP] in { - def t2TPsoft : T2XI<(outs), (ins), IIC_Br, - "bl\t__aeabi_read_tp", - [(set R0, ARMthread_pointer)]> { - let Inst{31-27} = 0b11110; - let Inst{15-14} = 0b11; - let Inst{12} = 1; - } -} - -//===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return // address and save #0 in R0 for the non-longjmp case. @@ -2990,28 +2946,13 @@ let Defs = // // FIXME: remove when we have a way to marking a MI with these properties. -// FIXME: $dst1 should be a def. But the extra ops must be in the end of the -// operand list. // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, - reglist:$regs, variable_ops), - IIC_iLoad_mBr, - "ldmia${p}.w\t$Rn!, $regs", - "$Rn = $wb", []> { - bits<4> Rn; - bits<16> regs; - - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b00; - let Inst{24-23} = 0b01; // Increment After - let Inst{22} = 0; - let Inst{21} = 1; // Writeback - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-0} = regs; -} +def t2LDMIA_RET: t2PseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + Size4Bytes, IIC_iLoad_mBr, []>, + RegConstraint<"$Rn = $wb">; let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index b4c3239..d2aaa97 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -166,6 +166,15 @@ defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>; def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; +def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>, + Requires<[HasVFP2]>; +def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>, + Requires<[HasVFP2]>; +def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>, + Requires<[HasVFP2]>; +def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>, + Requires<[HasVFP2]>; + // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index f4645f1..c6efea1 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -329,13 +329,9 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NewBase == 0) return false; } - int BaseOpc = !isThumb2 - ? ARM::ADDri - : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); + int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; if (Offset < 0) { - BaseOpc = !isThumb2 - ? ARM::SUBri - : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); + BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; Offset = - Offset; } int ImmedOffset = isThumb2 @@ -516,8 +512,6 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (!MI) return false; if (MI->getOpcode() != ARM::t2SUBri && - MI->getOpcode() != ARM::t2SUBrSPi && - MI->getOpcode() != ARM::t2SUBrSPi12 && MI->getOpcode() != ARM::tSUBspi && MI->getOpcode() != ARM::SUBri) return false; @@ -541,8 +535,6 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, if (!MI) return false; if (MI->getOpcode() != ARM::t2ADDri && - MI->getOpcode() != ARM::t2ADDrSPi && - MI->getOpcode() != ARM::t2ADDrSPi12 && MI->getOpcode() != ARM::tADDspi && MI->getOpcode() != ARM::ADDri) return false; @@ -1461,19 +1453,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects()) + const MCInstrDesc &MCID = I->getDesc(); + if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && TID.mayStore()) + if (isLd && MCID.mayStore()) return false; if (!isLd) { - if (TID.mayLoad()) + if (MCID.mayLoad()) return false; // It's not safe to move the first 'str' down. // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (TID.mayStore()) + if (MCID.mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { @@ -1672,14 +1664,14 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, Ops.pop_back(); Ops.pop_back(); - const TargetInstrDesc &TID = TII->get(NewOpc); - const TargetRegisterClass *TRC = TID.OpInfo[0].getRegClass(TRI); + const MCInstrDesc &MCID = TII->get(NewOpc); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); MRI->constrainRegClass(EvenReg, TRC); MRI->constrainRegClass(OddReg, TRC); // Form the pair instruction. if (isLd) { - MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, TID) + MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) .addReg(EvenReg, RegState::Define) .addReg(OddReg, RegState::Define) .addReg(BaseReg); @@ -1691,7 +1683,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MIB.addImm(Offset).addImm(Pred).addReg(PredReg); ++NumLDRDFormed; } else { - MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, TID) + MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) .addReg(EvenReg) .addReg(OddReg) .addReg(BaseReg); @@ -1742,8 +1734,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { while (MBBI != E) { for (; MBBI != E; ++MBBI) { MachineInstr *MI = MBBI; - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.isCall() || TID.isTerminator()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.isCall() || MCID.isTerminator()) { // Stop at barriers. ++MBBI; break; diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp index c5f727d..4fcba11 100644 --- a/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -1274,7 +1274,7 @@ void ARMMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { // Pseudo instructions don't get encoded. - const TargetInstrDesc &Desc = TII.get(MI.getOpcode()); + const MCInstrDesc &Desc = TII.get(MI.getOpcode()); uint64_t TSFlags = Desc.TSFlags; if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo) return; diff --git a/lib/Target/ARM/ARMMachObjectWriter.cpp b/lib/Target/ARM/ARMMachObjectWriter.cpp index 4c35d0b..a36e47d 100644 --- a/lib/Target/ARM/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/ARMMachObjectWriter.cpp @@ -8,19 +8,376 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMFixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; +using namespace llvm::object; namespace { class ARMMachObjectWriter : public MCMachObjectTargetWriter { + void RecordARMScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue); + void RecordARMMovwMovtRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue); + public: ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/true) {} + + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue); }; } +static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, + unsigned &Log2Size) { + RelocType = unsigned(macho::RIT_Vanilla); + Log2Size = ~0U; + + switch (Kind) { + default: + return false; + + case FK_Data_1: + Log2Size = llvm::Log2_32(1); + return true; + case FK_Data_2: + Log2Size = llvm::Log2_32(2); + return true; + case FK_Data_4: + Log2Size = llvm::Log2_32(4); + return true; + case FK_Data_8: + Log2Size = llvm::Log2_32(8); + return true; + + // Handle 24-bit branch kinds. + case ARM::fixup_arm_ldst_pcrel_12: + case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: + RelocType = unsigned(macho::RIT_ARM_Branch24Bit); + // Report as 'long', even though that is not quite accurate. + Log2Size = llvm::Log2_32(4); + return true; + + // Handle Thumb branches. + case ARM::fixup_arm_thumb_br: + RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + Log2Size = llvm::Log2_32(2); + return true; + + case ARM::fixup_t2_uncondbranch: + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + Log2Size = llvm::Log2_32(4); + return true; + + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + RelocType = unsigned(macho::RIT_ARM_HalfDifference); + // Report as 'long', even though that is not quite accurate. + Log2Size = llvm::Log2_32(4); + return true; + + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + RelocType = unsigned(macho::RIT_ARM_Half); + // Report as 'long', even though that is not quite accurate. + Log2Size = llvm::Log2_32(4); + return true; + } +} + +void ARMMachObjectWriter:: +RecordARMMovwMovtRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Type = macho::RIT_ARM_Half; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint32_t Value2 = 0; + uint64_t SecAddr = + Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + Type = macho::RIT_ARM_HalfDifference; + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field: + // + // For these two r_type relocations they always have a pair following them and + // the r_length bits are used differently. The encoding of the r_length is as + // follows: + // low bit of r_length: + // 0 - :lower16: for movw instructions + // 1 - :upper16: for movt instructions + // high bit of r_length: + // 0 - arm instructions + // 1 - thumb instructions + // the other half of the relocated expression is in the following pair + // relocation entry in the the low 16 bits of r_address field. + unsigned ThumbBit = 0; + unsigned MovtBit = 0; + switch ((unsigned)Fixup.getKind()) { + default: break; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + MovtBit = 1; + break; + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + MovtBit = 1; + // Fallthrough + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + ThumbBit = 1; + break; + } + + + if (Type == macho::RIT_ARM_HalfDifference) { + uint32_t OtherHalf = MovtBit + ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); + + macho::RelocationEntry MRE; + MRE.Word0 = ((OtherHalf << 0) | + (macho::RIT_Pair << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; + Writer->addRelocation(Fragment->getParent(), MRE); + } + + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Type = macho::RIT_Vanilla; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + Type = macho::RIT_Difference; + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == macho::RIT_Difference || + Type == macho::RIT_Generic_LocalDifference) { + macho::RelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (macho::RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; + Writer->addRelocation(Fragment->getParent(), MRE); + } + + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Log2Size; + unsigned RelocType = macho::RIT_Vanilla; + if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { + report_fatal_error("unknown ARM fixup kind!"); + return; + } + + // If this is a difference or a defined symbol plus an offset, then we need a + // scattered relocation entry. Differences always require scattered + // relocations. + if (Target.getSymB()) { + if (RelocType == macho::RIT_ARM_Half || + RelocType == macho::RIT_ARM_HalfDifference) + return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, FixedValue); + return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + } + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // FIXME: For other platforms, we need to use scattered relocations for + // internal relocations with offsets. If this is an internal relocation with + // an offset, it also needs a scattered relocation entry. + // + // Is this right for ARM? + uint32_t Offset = Target.getConstant(); + if (IsPCRel && RelocType == macho::RIT_Vanilla) + Offset += 1 << Log2Size; + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) + return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + + // See <reloc.h>. + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Target.isAbsolute()) { // constant + // FIXME! + report_fatal_error("FIXME: relocations to absolute targets " + "not yet implemented"); + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + } + + // Check whether we need an external or internal relocation. + if (Writer->doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolOffset(SD); + } else { + // The index is the section ordinal (1-based). + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += Writer->getSectionAddress(&SymSD); + } + if (IsPCRel) + FixedValue -= Writer->getSectionAddress(Fragment->getParent()); + + // The type is determined by the fixup kind. + Type = RelocType; + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS, bool Is64Bit, uint32_t CPUType, diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 7741410..76eb496 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -228,6 +228,9 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { // the general GPR register class above (MOV, e.g.) def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; +// The high registers in thumb mode, R8-R15. +def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; + // For tail calls, we can't use callee-saved registers, as they are restored // to the saved value before the tail call, which would clobber a call address. // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c6f266b..694b313 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -15,7 +15,6 @@ #include "ARMGenSubtarget.inc" #include "ARMBaseRegisterInfo.h" #include "llvm/GlobalValue.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; @@ -31,8 +30,8 @@ static cl::opt<bool> StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); -ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, - bool isT) +ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool isT) : ARMArchVersion(V4) , ARMProcFamily(Others) , ARMFPUType(None) @@ -57,21 +56,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , FPOnlySP(false) , AllowsUnalignedMem(false) , stackAlignment(4) - , CPUString("generic") + , CPUString(CPU) , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { - // Default to soft float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Soft; - // Determine default and user specified characteristics // When no arch is specified either by CPU or by attributes, make the default // ARMv4T. const char *ARMArchFeature = ""; + if (CPUString.empty()) + CPUString = "generic"; if (CPUString == "generic" && (FS.empty() || FS == "generic")) { ARMArchVersion = V4T; - ARMArchFeature = ",+v4t"; + ARMArchFeature = "+v4t"; } // Set the boolean corresponding to the current target triple, or the default @@ -90,29 +87,29 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { ARMArchVersion = V7A; - ARMArchFeature = ",+v7a"; + ARMArchFeature = "+v7a"; if (Len >= Idx+2 && TT[Idx+1] == 'm') { ARMArchVersion = V7M; - ARMArchFeature = ",+v7m"; + ARMArchFeature = "+v7m"; } } else if (SubVer == '6') { ARMArchVersion = V6; - ARMArchFeature = ",+v6"; + ARMArchFeature = "+v6"; if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') { ARMArchVersion = V6T2; - ARMArchFeature = ",+v6t2"; + ARMArchFeature = "+v6t2"; } } else if (SubVer == '5') { ARMArchVersion = V5T; - ARMArchFeature = ",+v5t"; + ARMArchFeature = "+v5t"; if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') { ARMArchVersion = V5TE; - ARMArchFeature = ",+v5te"; + ARMArchFeature = "+v5te"; } } else if (SubVer == '4') { if (Len >= Idx+2 && TT[Idx+1] == 't') { ARMArchVersion = V4T; - ARMArchFeature = ",+v4t"; + ARMArchFeature = "+v4t"; } else { ARMArchVersion = V4; ARMArchFeature = ""; @@ -123,18 +120,15 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (TT.find("eabi") != std::string::npos) TargetABI = ARM_ABI_AAPCS; - // Parse features string. If the first entry in FS (the CPU) is missing, - // insert the architecture feature derived from the target triple. This is - // important for setting features that are implied based on the architecture - // version. - std::string FSWithArch; - if (FS.empty()) - FSWithArch = std::string(ARMArchFeature); - else if (FS.find(',') == 0) - FSWithArch = std::string(ARMArchFeature) + FS; - else + // Insert the architecture feature derived from the target triple into the + // feature string. This is important for setting features that are implied + // based on the architecture version. + std::string FSWithArch = std::string(ARMArchFeature); + if (FSWithArch.empty()) FSWithArch = FS; - CPUString = ParseSubtargetFeatures(FSWithArch, CPUString); + else if (!FS.empty()) + FSWithArch = FSWithArch + "," + FS; + ParseSubtargetFeatures(FSWithArch, CPUString); // After parsing Itineraries, set ItinData.IssueWidth. computeIssueWidth(); diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 0271c87..7c93173 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -14,9 +14,8 @@ #ifndef ARMSUBTARGET_H #define ARMSUBTARGET_H -#include "llvm/Target/TargetInstrItineraries.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtarget.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/ADT/Triple.h" #include <string> @@ -154,7 +153,8 @@ protected: /// This constructor initializes the data members to match that /// of the specified triple. /// - ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb); + ARMSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool isThumb); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -165,8 +165,7 @@ protected: } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); void computeIssueWidth(); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 29aa4f7..80e7d55 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -78,18 +78,24 @@ extern "C" void LLVMInitializeARMTarget() { /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool isThumb) : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, isThumb), + Subtarget(TT, CPU, FS, isThumb), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { DefRelocModel = getRelocationModel(); + + // Default to soft float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Soft; } ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), + : ARMBaseTargetMachine(T, TT, CPU, FS, false), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32") : @@ -105,8 +111,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : ARMBaseTargetMachine(T, TT, FS, true), + : ARMBaseTargetMachine(T, TT, CPU, FS, true), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index e0aa149..a4a7927 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -41,7 +41,8 @@ private: public: ARMBaseTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool isThumb); + const std::string &CPU, const std::string &FS, + bool isThumb); virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -70,7 +71,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMFrameLowering FrameLowering; public: ARMTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const ARMRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -109,7 +110,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { OwningPtr<ARMFrameLowering> FrameLowering; public: ThumbTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index 2428ce1..d9a5fa2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -87,8 +87,9 @@ public: : ARMBaseAsmLexer(T, MAI) { std::string tripleString("arm-unknown-unknown"); std::string featureString; + std::string CPU; OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, featureString)); + targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); InitRegisterMap(targetMachine->getRegisterInfo()); } }; @@ -99,8 +100,9 @@ public: : ARMBaseAsmLexer(T, MAI) { std::string tripleString("thumb-unknown-unknown"); std::string featureString; + std::string CPU; OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, featureString)); + targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); InitRegisterMap(targetMachine->getRegisterInfo()); } }; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4bc12c9..6952c38 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -350,6 +350,22 @@ public: bool isCondCode() const { return Kind == CondCode; } bool isCCOut() const { return Kind == CCOut; } bool isImm() const { return Kind == Immediate; } + bool isImm0_255() const { + if (Kind != Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 256; + } + bool isT2SOImm() const { + if (Kind != Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(Value) != -1; + } bool isReg() const { return Kind == Register; } bool isRegList() const { return Kind == RegisterList; } bool isDPRRegList() const { return Kind == DPRRegisterList; } @@ -515,6 +531,16 @@ public: addExpr(Inst, getImm()); } + void addImm0_255Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addT2SOImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); @@ -1761,7 +1787,7 @@ static StringRef SplitMnemonic(StringRef Mnemonic, Mnemonic == "vcle" || (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || - Mnemonic == "vqdmlal")) + Mnemonic == "vqdmlal" || Mnemonic == "bics")) return Mnemonic; // First, split out any predication code. @@ -1769,7 +1795,9 @@ static StringRef SplitMnemonic(StringRef Mnemonic, .Case("eq", ARMCC::EQ) .Case("ne", ARMCC::NE) .Case("hs", ARMCC::HS) + .Case("cs", ARMCC::HS) .Case("lo", ARMCC::LO) + .Case("cc", ARMCC::LO) .Case("mi", ARMCC::MI) .Case("pl", ARMCC::PL) .Case("vs", ARMCC::VS) @@ -1824,6 +1852,7 @@ static StringRef SplitMnemonic(StringRef Mnemonic, void ARMAsmParser:: GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { + bool isThumbOne = TM.getSubtarget<ARMSubtarget>().isThumb1Only(); bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb(); if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || @@ -1834,7 +1863,7 @@ GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" || Mnemonic == "eor" || Mnemonic == "smlal" || - (Mnemonic == "mov" && !isThumb)) { + (Mnemonic == "mov" && !isThumbOne)) { CanAcceptCarrySet = true; } else { CanAcceptCarrySet = false; @@ -1853,8 +1882,7 @@ GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, if (isThumb) if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" || - Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp" || - Mnemonic == "mov") + Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp") CanAcceptPredicationCode = false; } diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index edc0054..b1d4f54 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) -tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(ARMGenRegisterNames.inc -gen-register-enums) -tablegen(ARMGenRegisterInfo.inc -gen-register-desc) -tablegen(ARMGenInstrNames.inc -gen-instr-enums) -tablegen(ARMGenInstrInfo.inc -gen-instr-desc) +tablegen(ARMGenRegisterInfo.inc -gen-register-info) +tablegen(ARMGenInstrInfo.inc -gen-instr-info) tablegen(ARMGenCodeEmitter.inc -gen-emitter) tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(ARMGenAsmWriter.inc -gen-asm-writer) diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index 271ca8c..fe165b0 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -24,8 +24,8 @@ //#define DEBUG(X) do { X; } while (0) /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const -/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s -/// describing the operand info for each ARMInsts[i]. +/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the +/// operand info for each ARMInsts[i]. /// /// Together with an instruction's encoding format, we can take advantage of the /// NumOperands and the OpInfo fields of the target instruction description in @@ -46,10 +46,10 @@ /// dag DefaultOps = (ops (i32 14), (i32 zero_reg)); /// } /// -/// which is manifested by the TargetOperandInfo[] of: +/// which is manifested by the MCOperandInfo[] of: /// -/// { 0, 0|(1<<TOI::Predicate), 0 }, -/// { ARM::CCRRegClassID, 0|(1<<TOI::Predicate), 0 } +/// { 0, 0|(1<<MCOI::Predicate), 0 }, +/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 } /// /// So the first predicate MCOperand corresponds to the immediate part of the /// ARM condition field (Inst{31-28}), and the second predicate MCOperand @@ -66,11 +66,12 @@ /// dag DefaultOps = (ops (i32 zero_reg)); /// } /// -/// which is manifested by the one TargetOperandInfo of: +/// which is manifested by the one MCOperandInfo of: /// -/// { ARM::CCRRegClassID, 0|(1<<TOI::OptionalDef), 0 } +/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 } /// /// And this maps to one MCOperand with the regsiter kind of ARM::CPSR. +#define GET_INSTRINFO_MC_DESC #include "ARMGenInstrInfo.inc" using namespace llvm; @@ -588,9 +589,9 @@ static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) { static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + unsigned short NumDefs = MCID.getNumDefs(); + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -739,9 +740,9 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, if (PW) { MI.addOperand(MCOperand::CreateReg(0)); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - const TargetInstrDesc &TID = ARMInsts[Opcode]; + const MCInstrDesc &MCID = ARMInsts[Opcode]; unsigned IndexMode = - (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; + (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2, ARM_AM::no_shift, IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); @@ -802,7 +803,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (CoprocessorOpcode(Opcode)) return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; // MRS and MRSsys take one GPR reg Rd. @@ -901,7 +902,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; @@ -976,10 +977,10 @@ static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) { static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); - bool isUnary = isUnaryDP(TID.TSFlags); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + unsigned short NumDefs = MCID.getNumDefs(); + bool isUnary = isUnaryDP(MCID.TSFlags); + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1041,7 +1042,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // If this is a two-address operand, skip it, e.g., MOVCCr operand 1. - if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) { + if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) { MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; } @@ -1089,10 +1090,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); - bool isUnary = isUnaryDP(TID.TSFlags); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + unsigned short NumDefs = MCID.getNumDefs(); + bool isUnary = isUnaryDP(MCID.TSFlags); + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1118,7 +1119,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // If this is a two-address operand, skip it, e.g., MOVCCs operand 1. - if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) { + if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) { MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; } @@ -1244,17 +1245,17 @@ static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBac static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - bool isPrePost = isPrePostLdSt(TID.TSFlags); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + bool isPrePost = isPrePostLdSt(MCID.TSFlags); + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && TID.getNumDefs() > 0) || - (isStore && (TID.getNumDefs() == 0 || isPrePost))) + assert(((!isStore && MCID.getNumDefs() > 0) || + (isStore && (MCID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. @@ -1291,7 +1292,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) + assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) && "Index mode or tied_to operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); @@ -1308,7 +1309,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; unsigned IndexMode = - (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; + (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getIBit(insn) == 0) { // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2). // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already @@ -1379,17 +1380,17 @@ static bool HasDualReg(unsigned Opcode) { static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - bool isPrePost = isPrePostLdSt(TID.TSFlags); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + bool isPrePost = isPrePostLdSt(MCID.TSFlags); + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && TID.getNumDefs() > 0) || - (isStore && (TID.getNumDefs() == 0 || isPrePost))) + assert(((!isStore && MCID.getNumDefs() > 0) || + (isStore && (MCID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. @@ -1433,7 +1434,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) + assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) && "Offset mode or tied_to operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); @@ -1451,7 +1452,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; unsigned IndexMode = - (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; + (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getAM3IBit(insn) == 1) { MI.addOperand(MCOperand::CreateReg(0)); @@ -1539,7 +1540,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; @@ -1591,7 +1592,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1653,8 +1654,8 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (decodeRd(insn) == 15 || decodeRm(insn) == 15) return false; - const TargetInstrDesc &TID = ARMInsts[Opcode]; - NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + const MCInstrDesc &MCID = ARMInsts[Opcode]; + NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands // Disassemble register def. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -1696,7 +1697,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (decodeRd(insn) == 15 || decodeRm(insn) == 15) return false; - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1802,7 +1803,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1842,8 +1843,8 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3"); - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1858,7 +1859,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // Skip tied_to operand constraint. - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { assert(NumOps >= 4 && "Expect >=4 operands"); MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -1886,8 +1887,8 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2"); - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297 @@ -1903,7 +1904,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, getRegisterEnum(B, RegClassID, decodeVFPRd(insn, SP)))); - assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && + assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 && "Tied to operand expected"); MI.addOperand(MI.getOperand(0)); @@ -1961,7 +1962,7 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -2011,7 +2012,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2136,7 +2137,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2402,8 +2403,8 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, unsigned alignment, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; // At least one DPR register plus addressing mode #6. assert(NumOps >= 3 && "Expect >= 3 operands"); @@ -2507,7 +2508,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { - assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 && + assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 && "Tied to operand expected"); MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -2757,8 +2758,8 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || @@ -2848,8 +2849,8 @@ enum N2VFlag { static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opc]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opc]; + const MCOperandInfo *OpInfo = MCID.OpInfo; assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || @@ -2878,7 +2879,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, ++OpIdx; // VPADAL... - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -2892,7 +2893,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, // VZIP and others have two TIED_TO reg operands. int Idx; while (OpIdx < NumOps && - (Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // Add TIED_TO operand. MI.addOperand(MI.getOperand(Idx)); ++OpIdx; @@ -2945,8 +2946,8 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; assert(NumOps >= 3 && (OpInfo[0].RegClass == ARM::DPRRegClassID || @@ -2964,7 +2965,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, decodeNEONRd(insn)))); ++OpIdx; - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -3044,8 +3045,8 @@ enum N3VFlag { static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs. assert(NumOps >= 3 && @@ -3076,7 +3077,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // VABA, VABAL, VBSLd, VBSLq, ... - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -3163,8 +3164,8 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; assert(NumOps >= 3 && @@ -3192,7 +3193,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Process tied_to operand constraint. int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { MI.addOperand(MI.getOperand(Idx)); ++OpIdx; } @@ -3221,11 +3222,11 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; - assert(TID.getNumDefs() == 1 && NumOps >= 3 && + assert(MCID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && OpInfo[2].RegClass < 0 && @@ -3255,14 +3256,14 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; - assert(TID.getNumDefs() == 1 && NumOps >= 3 && + assert(MCID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::DPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && - TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && + MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 && OpInfo[2].RegClass == ARM::GPRRegClassID && OpInfo[3].RegClass < 0 && "Expect >= 3 operands with one dst operand"); @@ -3294,7 +3295,7 @@ static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || @@ -3604,11 +3605,11 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, assert(NumOpsRemaining > 0 && "Invalid argument"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned Idx = MI.getNumOperands(); // First, we check whether this instr specifies the PredicateOperand through - // a pair of TargetOperandInfos with isPredicate() property. + // a pair of MCOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && OpInfo[Idx].RegClass < 0 && @@ -3636,13 +3637,13 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, assert(NumOpsRemaining > 0 && "Invalid argument"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; const std::string &Name = ARMInsts[Opcode].Name; unsigned Idx = MI.getNumOperands(); uint64_t TSFlags = ARMInsts[Opcode].TSFlags; // First, we check whether this instr specifies the PredicateOperand through - // a pair of TargetOperandInfos with isPredicate() property. + // a pair of MCOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && OpInfo[Idx].RegClass < 0 && diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 9639c8a..834c6f6 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -350,7 +350,7 @@ static inline unsigned decodeRotate(uint32_t insn) { static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -425,8 +425,8 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -454,7 +454,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID && "Thumb reg operand expected"); int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // The reg operand is tied to the first reg operand. MI.addOperand(MI.getOperand(Idx)); ++OpIdx; @@ -511,8 +511,8 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -530,7 +530,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpIdx < NumOps && "More operands expected"); int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // The reg operand is tied to the first reg operand. MI.addOperand(MI.getOperand(Idx)); ++OpIdx; @@ -554,7 +554,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -602,7 +602,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 2 && @@ -630,8 +630,8 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; assert(NumOps >= 2 @@ -680,7 +680,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) && "Unexpected opcode"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 3 && @@ -708,7 +708,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, assert(Opcode == ARM::tADDrPCi && "Unexpected opcode"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -733,7 +733,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, assert(Opcode == ARM::tADDrSPi && "Unexpected opcode"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 3 && @@ -810,7 +810,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP) return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; // Predicate operands are handled elsewhere. if (NumOps == 2 && @@ -958,7 +958,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::tTRAP) return true; - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps == 3 && OpInfo[0].RegClass < 0 && @@ -989,7 +989,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected"); @@ -1226,7 +1226,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; @@ -1316,7 +1316,7 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 4 @@ -1423,8 +1423,8 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) { static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; // Special case handling. @@ -1467,7 +1467,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if (ThreeReg) { int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // Process tied_to operand constraint. MI.addOperand(MI.getOperand(Idx)); ++OpIdx; @@ -1521,8 +1521,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1550,7 +1550,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, return false; } int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // The reg operand is tied to the first reg operand. MI.addOperand(MI.getOperand(Idx)); } else { @@ -1590,8 +1590,8 @@ static inline bool Thumb2SaturateOpcode(unsigned Opcode) { /// o t2SSAT16, t2USAT16: Rs sat_pos Rn static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + const MCInstrDesc &MCID = ARMInsts[Opcode]; + NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands // Disassemble the register def. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, @@ -1635,8 +1635,8 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1659,7 +1659,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (TwoReg) { assert(NumOps >= 3 && "Expect >= 3 operands"); int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // Process tied_to operand constraint. MI.addOperand(MI.getOperand(Idx)); } else { @@ -1907,8 +1907,8 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, // t2PLDs: Rn Rm imm2=Inst{5-4} // Same pattern applies for t2PLDW* and t2PLI*. - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2073,8 +2073,8 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, // See, for example, A6.3.7 Load word: Table A6-18 Load word. if (Load && Rn == 15) return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2085,7 +2085,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, "Expect >= 3 operands and first two as reg operands"); bool ThreeReg = (OpInfo[2].RegClass > 0); - bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1; + bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1; bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td // Build the register operands, followed by the immediate. @@ -2160,8 +2160,8 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2214,7 +2214,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::rGPRRegClassID && @@ -2259,7 +2259,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::rGPRRegClassID && diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f6d0242..2df0053 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -137,11 +137,11 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. - const TargetInstrDesc &TID = MI->getDesc(); - unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (TID.mayStore()) + const MCInstrDesc &MCID = MI->getDesc(); + unsigned Domain = MCID.TSFlags & ARMII::DomainMask; + if (MCID.mayStore()) return false; - unsigned Opcode = TID.getOpcode(); + unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) return false; if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) @@ -218,18 +218,18 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); unsigned PredReg = MI->getOperand(++NextOp).getReg(); - const TargetInstrDesc &TID1 = TII->get(MulOpc); - const TargetInstrDesc &TID2 = TII->get(AddSubOpc); - unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI)); + const MCInstrDesc &MCID1 = TII->get(MulOpc); + const MCInstrDesc &MCID2 = TII->get(AddSubOpc); + unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg) + MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) .addReg(Src2Reg, getKillRegState(Src2Kill)); if (HasLane) MIB.addImm(LaneImm); MIB.addImm(Pred).addReg(PredReg); - MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2) + MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); if (NegAcc) { @@ -273,15 +273,15 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { continue; } - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.isBarrier()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.isBarrier()) { clearStack(); Skip = 0; ++MII; continue; } - unsigned Domain = TID.TSFlags & ARMII::DomainMask; + unsigned Domain = MCID.TSFlags & ARMII::DomainMask; if (Domain == ARMII::DomainGeneral) { if (++Skip == 2) // Assume dual issues of non-VFP / NEON instructions. @@ -291,7 +291,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { unsigned MulOpc, AddSubOpc; bool NegAcc, HasLane; - if (!TII->isFpMLxInstruction(TID.getOpcode(), + if (!TII->isFpMLxInstruction(MCID.getOpcode(), MulOpc, AddSubOpc, NegAcc, HasLane) || !FindMLxHazard(MI)) pushStack(MI); diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 65a6494..6472c53 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -12,9 +12,8 @@ LIBRARYNAME = LLVMARMCodeGen TARGET = ARM # Make sure that tblgen is run, first thing. -BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ - ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ - ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ +BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ + ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ ARMGenDecoderTables.inc ARMGenEDInfo.inc \ diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index e56d481..48211d8 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -160,7 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) + .addReg(ARM::SP)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already @@ -177,7 +178,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { } static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - if (MI->getOpcode() == ARM::tRestore && + if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) return true; @@ -239,11 +240,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) - .addReg(ARM::R4); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + ARM::SP) + .addReg(ARM::R4)); } else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) - .addReg(FramePtr); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + ARM::SP) + .addReg(FramePtr)); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 3fbb433..218311d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -13,7 +13,6 @@ #include "Thumb1InstrInfo.h" #include "ARM.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -37,18 +36,8 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - bool tDest = ARM::tGPRRegClass.contains(DestReg); - bool tSrc = ARM::tGPRRegClass.contains(SrcReg); - unsigned Opc = ARM::tMOVgpr2gpr; - if (tDest && tSrc) - Opc = ARM::tMOVr; - else if (tSrc) - Opc = ARM::tMOVtgpr2gpr; - else if (tDest) - Opc = ARM::tMOVgpr2tgpr; - - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc))); assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && "Thumb1 can only copy GPR registers"); } @@ -76,7 +65,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } @@ -105,7 +94,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 6bf5650..4eb0b6c 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -239,13 +239,13 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, unsigned Chunk = (1 << 3) - 1; unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; Bytes -= ThisVal; - const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); + const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags)); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg, RegState::Kill)) .setMIFlags(MIFlags); } BaseReg = DestReg; @@ -291,8 +291,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, } if (ExtraOpc) { - const TargetInstrDesc &TID = TII.get(ExtraOpc); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) + const MCInstrDesc &MCID = TII.get(ExtraOpc); + AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) .addReg(DestReg, RegState::Kill) .addImm(((unsigned)NumBytes) & 3) .setMIFlags(MIFlags)); @@ -360,8 +360,8 @@ static void emitThumbConstant(MachineBasicBlock &MBB, if (Imm > 0) emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI); if (isSub) { - const TargetInstrDesc &TID = TII.get(ARM::tRSB); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) + const MCInstrDesc &MCID = TII.get(ARM::tRSB); + AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) .addReg(DestReg, RegState::Kill)); } } @@ -377,11 +377,9 @@ static void removeOperands(MachineInstr &MI, unsigned i) { static unsigned convertToNonSPOpcode(unsigned Opcode) { switch (Opcode) { case ARM::tLDRspi: - case ARM::tRestore: // FIXME: Should this opcode be here? return ARM::tLDRi; case ARM::tSTRspi: - case ARM::tSpill: // FIXME: Should this opcode be here? return ARM::tSTRi; } @@ -396,7 +394,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MachineBasicBlock &MBB = *MI.getParent(); DebugLoc dl = MI.getDebugLoc(); unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); if (Opcode == ARM::tADDrSPi) { @@ -419,13 +417,12 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, unsigned PredReg; if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); + MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - // Remove offset and remaining explicit predicate operands. - do MI.RemoveOperand(FrameRegIdx+1); - while (MI.getNumOperands() > FrameRegIdx+1 && - (!MI.getOperand(FrameRegIdx+1).isReg() || - !MI.getOperand(FrameRegIdx+1).isImm())); + // Remove offset and add predicate operands. + MI.RemoveOperand(FrameRegIdx+1); + MachineInstrBuilder MIB(&MI); + AddDefaultPred(MIB); return true; } @@ -524,7 +521,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // If this is a thumb spill / restore, we will be using a constpool load to // materialize the offset. - if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) { + if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { ImmOp.ChangeToImmediate(0); } else { // Otherwise, it didn't fit. Pull in what we can to simplify the immed. @@ -567,8 +564,9 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. DebugLoc DL; - BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). - addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); + AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(Reg, RegState::Kill)); // The UseMI is where we would like to restore the register. If there's // interference with R12 before then, however, we'll need to restore it @@ -591,8 +589,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, } } // Restore the register from R12 - BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). - addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); + AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)). + addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill)); return true; } @@ -653,7 +651,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(Offset && "This code isn't needed if offset already handled!"); unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); @@ -664,7 +662,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Use the destination register to materialize sp + offset. unsigned TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; - if (Opcode == ARM::tRestore) { + if (Opcode == ARM::tLDRspi) { if (FrameReg == ARM::SP) emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg, Offset, false, TII, *this); @@ -687,7 +685,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); bool UseRR = false; - if (Opcode == ARM::tSpill) { + if (Opcode == ARM::tSTRspi) { if (FrameReg == ARM::SP) emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg, Offset, false, TII, *this); diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 45e6937..360ec00 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -98,9 +98,6 @@ static bool isCopy(MachineInstr *MI) { case ARM::MOVr: case ARM::MOVr_TC: case ARM::tMOVr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2gpr: case ARM::t2MOVr: return true; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index d169dbb..51b56aa 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -15,7 +15,6 @@ #include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMAddressingModes.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -113,18 +112,8 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); - bool tDest = ARM::tGPRRegClass.contains(DestReg); - bool tSrc = ARM::tGPRRegClass.contains(SrcReg); - unsigned Opc = ARM::tMOVgpr2gpr; - if (tDest && tSrc) - Opc = ARM::tMOVr; - else if (tSrc) - Opc = ARM::tMOVtgpr2gpr; - else if (tDest) - Opc = ARM::tMOVgpr2tgpr; - - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc))); } void Thumb2InstrInfo:: @@ -232,8 +221,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = 0; if (DestReg == ARM::SP && BaseReg != ARM::SP) { // mov sp, rn. Note t2MOVr cannot be used. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg) - .addReg(BaseReg).setMIFlags(MIFlags); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg) + .addReg(BaseReg).setMIFlags(MIFlags)); BaseReg = ARM::SP; continue; } @@ -252,7 +241,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, } // sub rd, sp, so_imm - Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi; + Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { NumBytes = 0; } else { @@ -396,7 +385,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); bool isSub = false; @@ -410,25 +399,24 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned PredReg; if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVgpr2gpr)); + MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset and remaining explicit predicate operands. do MI.RemoveOperand(FrameRegIdx+1); - while (MI.getNumOperands() > FrameRegIdx+1 && - (!MI.getOperand(FrameRegIdx+1).isReg() || - !MI.getOperand(FrameRegIdx+1).isImm())); + while (MI.getNumOperands() > FrameRegIdx+1); + MachineInstrBuilder MIB(&MI); + AddDefaultPred(MIB); return true; } - bool isSP = FrameReg == ARM::SP; bool HasCCOut = Opcode != ARM::t2ADDri12; if (Offset < 0) { Offset = -Offset; isSub = true; - MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri)); + MI.setDesc(TII.get(ARM::t2SUBri)); } else { - MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri)); + MI.setDesc(TII.get(ARM::t2ADDri)); } // Common case: small offset, fits into instruction. @@ -444,9 +432,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Another common case: imm12. if (Offset < 4096 && (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) { - unsigned NewOpc = isSP - ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12) - : (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12); + unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); @@ -579,8 +565,7 @@ void Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI, const TargetRegisterInfo &TRI) const { - if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr || - SrcMI->getOperand(1).isKill()) + if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill()) return; unsigned PredReg = 0; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index ce2e966..24a037c 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -57,10 +57,8 @@ namespace { static const ReduceEntry ReduceTable[] = { // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, - { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 }, { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, - // Note: immediate scale is 4. - { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0,1 }, { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, @@ -84,7 +82,7 @@ namespace { { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, // FIXME: Do we need the 16-bit 'S' variant? - { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0,0 }, + { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 }, { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0,0 }, { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0,0 }, { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, @@ -189,8 +187,8 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { } } -static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { - for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs) +static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { + for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs) if (*Regs == ARM::CPSR) return true; return false; @@ -291,7 +289,7 @@ static bool VerifyLowRegs(MachineInstr *MI) { Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || Opc == ARM::t2LDMDB_UPD); bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); - bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi); + bool isSPOk = isPCOk || isLROk; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) @@ -481,14 +479,54 @@ bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, MachineInstr *CPSRDef) { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::t2ADDri) { + // If the source register is SP, try to reduce to tADDrSPi, otherwise + // it's a normal reduce. + if (MI->getOperand(1).getReg() != ARM::SP) { + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) + return true; + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + } + // Try to reduce to tADDrSPi. + unsigned Imm = MI->getOperand(2).getImm(); + // The immediate must be in range, the destination register must be a low + // reg, the predicate must be "always" and the condition flags must not + // be being set. + if (Imm & 3 || Imm > 1024) + return false; + if (!isARMLowRegister(MI->getOperand(0).getReg())) + return false; + if (MI->getOperand(3).getImm() != ARMCC::AL) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.hasOptionalDef() && + MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) + return false; + + MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), + TII->get(ARM::tADDrSPi)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); + + MBB.erase(MI); + ++NumNarrows; + return true; + } + if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayLoad() || TID.mayStore()) + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayLoad() || MCID.mayStore()) return ReduceLoadStore(MBB, MI, Entry); - unsigned Opc = MI->getOpcode(); switch (Opc) { default: break; case ARM::t2ADDSri: @@ -531,13 +569,6 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } - case ARM::t2ADDrSPi: { - static const ReduceEntry NarrowEntry = - { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 }; - if (MI->getOperand(0).getReg() == ARM::SP) - return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef); - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); - } } return false; } @@ -576,23 +607,23 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, } // Check if it's possible / necessary to transfer the predicate. - const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2); + const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { - if (!NewTID.isPredicable()) + if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { - SkipPred = !NewTID.isPredicable(); + SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.hasOptionalDef()) { - unsigned NumOps = TID.getNumOperands(); + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.hasOptionalDef()) { + unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; @@ -602,15 +633,15 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. - if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(CPSRDef, MI)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); - if (NewTID.hasOptionalDef()) { + if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else @@ -618,11 +649,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, } // Transfer the rest of operands. - unsigned NumOps = TID.getNumOperands(); + unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { - if (i < NumOps && TID.OpInfo[i].isOptionalDef()) + if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; - if (SkipPred && TID.OpInfo[i].isPredicate()) + if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; MIB.addOperand(MI->getOperand(i)); } @@ -645,47 +676,44 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, return false; unsigned Limit = ~0U; - unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1; if (Entry.Imm1Limit) - Limit = ((1 << Entry.Imm1Limit) - 1) * Scale; + Limit = (1 << Entry.Imm1Limit) - 1; - const TargetInstrDesc &TID = MI->getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { - if (TID.OpInfo[i].isPredicate()) + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { + if (MCID.OpInfo[i].isPredicate()) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; - if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP) - continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) return false; } else if (MO.isImm() && - !TID.OpInfo[i].isPredicate()) { - if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0) + !MCID.OpInfo[i].isPredicate()) { + if (((unsigned)MO.getImm()) > Limit) return false; } } // Check if it's possible / necessary to transfer the predicate. - const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1); + const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { - if (!NewTID.isPredicable()) + if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { - SkipPred = !NewTID.isPredicable(); + SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; - if (TID.hasOptionalDef()) { - unsigned NumOps = TID.getNumOperands(); + if (MCID.hasOptionalDef()) { + unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; @@ -695,15 +723,15 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. - if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(CPSRDef, MI)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); - if (NewTID.hasOptionalDef()) { + if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else @@ -711,29 +739,25 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, } // Transfer the rest of operands. - unsigned NumOps = TID.getNumOperands(); + unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { - if (i < NumOps && TID.OpInfo[i].isOptionalDef()) + if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; - if ((TID.getOpcode() == ARM::t2RSBSri || - TID.getOpcode() == ARM::t2RSBri) && i == 2) + if ((MCID.getOpcode() == ARM::t2RSBSri || + MCID.getOpcode() == ARM::t2RSBri) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; - bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate()); + bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); if (SkipPred && isPred) continue; const MachineOperand &MO = MI->getOperand(i); - if (Scale > 1 && !isPred && MO.isImm()) - MIB.addImm(MO.getImm() / Scale); - else { - if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) - // Skip implicit def of CPSR. Either it's modeled as an optional - // def now or it's already an implicit def on the new instruction. - continue; - MIB.addOperand(MO); - } + if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) + // Skip implicit def of CPSR. Either it's modeled as an optional + // def now or it's already an implicit def on the new instruction. + continue; + MIB.addOperand(MO); } - if (!TID.isPredicable() && NewTID.isPredicable()) + if (!MCID.isPredicable() && NewMCID.isPredicable()) AddDefaultPred(MIB); // Transfer MI flags. diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h index 2c359da..435c95c 100644 --- a/lib/Target/Alpha/Alpha.h +++ b/lib/Target/Alpha/Alpha.h @@ -44,10 +44,13 @@ namespace llvm { // Defines symbolic names for Alpha registers. This defines a mapping from // register name to register number. // -#include "AlphaGenRegisterNames.inc" + +#define GET_REGINFO_ENUM +#include "AlphaGenRegisterInfo.inc" // Defines symbolic names for the Alpha instructions. // -#include "AlphaGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "AlphaGenInstrInfo.inc" #endif diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 0875cfd..daf9555 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -824,41 +824,24 @@ AlphaTargetLowering::getSingleConstraintMatchWeight( return weight; } -std::vector<unsigned> AlphaTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. +std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const +{ if (Constraint.size() == 1) { switch (Constraint[0]) { - default: break; // Unknown constriant letter - case 'f': - return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 , - Alpha::F3 , Alpha::F4 , Alpha::F5 , - Alpha::F6 , Alpha::F7 , Alpha::F8 , - Alpha::F9 , Alpha::F10, Alpha::F11, - Alpha::F12, Alpha::F13, Alpha::F14, - Alpha::F15, Alpha::F16, Alpha::F17, - Alpha::F18, Alpha::F19, Alpha::F20, - Alpha::F21, Alpha::F22, Alpha::F23, - Alpha::F24, Alpha::F25, Alpha::F26, - Alpha::F27, Alpha::F28, Alpha::F29, - Alpha::F30, Alpha::F31, 0); case 'r': - return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 , - Alpha::R3 , Alpha::R4 , Alpha::R5 , - Alpha::R6 , Alpha::R7 , Alpha::R8 , - Alpha::R9 , Alpha::R10, Alpha::R11, - Alpha::R12, Alpha::R13, Alpha::R14, - Alpha::R15, Alpha::R16, Alpha::R17, - Alpha::R18, Alpha::R19, Alpha::R20, - Alpha::R21, Alpha::R22, Alpha::R23, - Alpha::R24, Alpha::R25, Alpha::R26, - Alpha::R27, Alpha::R28, Alpha::R29, - Alpha::R30, Alpha::R31, 0); + return std::make_pair(0U, Alpha::GPRCRegisterClass); + case 'f': + return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) : + std::make_pair(0U, Alpha::F4RCRegisterClass); } } - - return std::vector<unsigned>(); + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } + //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index d38c314..13383f4 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -94,9 +94,9 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 5a2f561..220f167 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -14,16 +14,19 @@ #include "Alpha.h" #include "AlphaInstrInfo.h" #include "AlphaMachineFunctionInfo.h" -#include "AlphaGenInstrInfo.inc" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_MC_DESC +#include "AlphaGenInstrInfo.inc" using namespace llvm; AlphaInstrInfo::AlphaInstrInfo() - : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts)), + : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts), + Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP), RI(*this) { } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index d6c3809..0289307 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -33,10 +33,15 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include <cstdlib> + +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "AlphaGenRegisterInfo.inc" + using namespace llvm; AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) - : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP), + : AlphaGenRegisterInfo(), TII(tii) { } @@ -204,10 +209,8 @@ int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const { return -1; } -#include "AlphaGenRegisterInfo.inc" - std::string AlphaRegisterInfo::getPrettyName(unsigned reg) { - std::string s(RegisterDescriptors[reg].Name); + std::string s(AlphaRegDesc[reg].Name); return s; } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index ffe6cf1..1072bf7 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -15,7 +15,9 @@ #define ALPHAREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "AlphaGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "AlphaGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp index bda7104..7080327 100644 --- a/lib/Target/Alpha/AlphaSubtarget.cpp +++ b/lib/Target/Alpha/AlphaSubtarget.cpp @@ -16,10 +16,13 @@ #include "AlphaGenSubtarget.inc" using namespace llvm; -AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &FS) +AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS) : HasCT(false) { - std::string CPU = "generic"; + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(FS, CPUName); } diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h index f0eb93c..b1ccf26 100644 --- a/lib/Target/Alpha/AlphaSubtarget.h +++ b/lib/Target/Alpha/AlphaSubtarget.h @@ -14,9 +14,8 @@ #ifndef ALPHASUBTARGET_H #define ALPHASUBTARGET_H -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetSubtarget.h" - +#include "llvm/MC/MCInstrItineraries.h" #include <string> namespace llvm { @@ -32,12 +31,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - AlphaSubtarget(const std::string &TT, const std::string &FS); + AlphaSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); bool hasCT() const { return HasCT; } }; diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index b53533b..e854ccd 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -25,11 +25,12 @@ extern "C" void LLVMInitializeAlphaTarget() { } AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) : LLVMTargetMachine(T, TT), DataLayout("e-f128:128:128-n64"), FrameLowering(Subtarget), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), TLInfo(*this), TSInfo(*this) { setRelocationModel(Reloc::PIC_); diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 26238fb..cf00e58 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -37,7 +37,7 @@ class AlphaTargetMachine : public LLVMTargetMachine { public: AlphaTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt index 454262a..1f9edcf 100644 --- a/lib/Target/Alpha/CMakeLists.txt +++ b/lib/Target/Alpha/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS Alpha.td) -tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(AlphaGenRegisterNames.inc -gen-register-enums) -tablegen(AlphaGenRegisterInfo.inc -gen-register-desc) -tablegen(AlphaGenInstrNames.inc -gen-instr-enums) -tablegen(AlphaGenInstrInfo.inc -gen-instr-desc) +tablegen(AlphaGenRegisterInfo.inc -gen-register-info) +tablegen(AlphaGenInstrInfo.inc -gen-instr-info) tablegen(AlphaGenAsmWriter.inc -gen-asm-writer) tablegen(AlphaGenDAGISel.inc -gen-dag-isel) tablegen(AlphaGenCallingConv.inc -gen-callingconv) diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile index 9564be6..40c4f90 100644 --- a/lib/Target/Alpha/Makefile +++ b/lib/Target/Alpha/Makefile @@ -12,9 +12,7 @@ LIBRARYNAME = LLVMAlphaCodeGen TARGET = Alpha # Make sure that tblgen is run, first thing. -BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \ - AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \ - AlphaGenInstrInfo.inc \ +BUILT_SOURCES = AlphaGenRegisterInfo.inc AlphaGenInstrInfo.inc \ AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \ AlphaGenCallingConv.inc AlphaGenSubtarget.inc diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h index ec1fa86..c3ee7e7 100644 --- a/lib/Target/Blackfin/Blackfin.h +++ b/lib/Target/Blackfin/Blackfin.h @@ -30,9 +30,11 @@ namespace llvm { // Defines symbolic names for Blackfin registers. This defines a mapping from // register name to register number. -#include "BlackfinGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "BlackfinGenRegisterInfo.inc" // Defines symbolic names for the Blackfin instructions. -#include "BlackfinGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "BlackfinGenInstrInfo.inc" #endif diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index 42659ae..215ca43 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -146,21 +146,21 @@ void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) { NI != DAG.allnodes_end(); ++NI) { if (NI->use_empty() || !NI->isMachineOpcode()) continue; - const TargetInstrDesc &DefTID = TII.get(NI->getMachineOpcode()); + const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode()); for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) { if (!UI->isMachineOpcode()) continue; - if (UI.getUse().getResNo() >= DefTID.getNumDefs()) + if (UI.getUse().getResNo() >= DefMCID.getNumDefs()) continue; const TargetRegisterClass *DefRC = - DefTID.OpInfo[UI.getUse().getResNo()].getRegClass(TRI); + TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI); - const TargetInstrDesc &UseTID = TII.get(UI->getMachineOpcode()); - if (UseTID.getNumDefs()+UI.getOperandNo() >= UseTID.getNumOperands()) + const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode()); + if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands()) continue; const TargetRegisterClass *UseRC = - UseTID.OpInfo[UseTID.getNumDefs()+UI.getOperandNo()].getRegClass(TRI); + TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI); if (!DefRC || !UseRC) continue; // We cannot copy CC <-> !(CC/D) diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 588d9bd..d572832 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -621,39 +621,21 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { case 'w': return Pair(0U, ALLRegisterClass); case 'Z': return Pair(P3, PRegisterClass); case 'Y': return Pair(P1, PRegisterClass); + case 'z': return Pair(0U, zConsRegisterClass); + case 'D': return Pair(0U, DConsRegisterClass); + case 'W': return Pair(0U, WConsRegisterClass); + case 'c': return Pair(0U, cConsRegisterClass); + case 't': return Pair(0U, tConsRegisterClass); + case 'u': return Pair(0U, uConsRegisterClass); + case 'k': return Pair(0U, kConsRegisterClass); + case 'y': return Pair(0U, yConsRegisterClass); } // Not implemented: q0-q7, qA. Use {R2} etc instead. - // Constraints z, D, W, c, t, u, k, and y use non-existing classes, defer to - // getRegClassForInlineAsmConstraint() return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -std::vector<unsigned> BlackfinTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { - using namespace BF; - - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { - case 'z': return make_vector<unsigned>(P0, P1, P2, 0); - case 'D': return make_vector<unsigned>(R0, R2, R4, R6, 0); - case 'W': return make_vector<unsigned>(R1, R3, R5, R7, 0); - case 'c': return make_vector<unsigned>(I0, I1, I2, I3, - B0, B1, B2, B3, - L0, L1, L2, L3, 0); - case 't': return make_vector<unsigned>(LT0, LT1, 0); - case 'u': return make_vector<unsigned>(LB0, LB1, 0); - case 'k': return make_vector<unsigned>(LC0, LC1, 0); - case 'y': return make_vector<unsigned>(RETS, RETN, RETI, RETX, RETE, - ASTAT, SEQSTAT, USP, 0); - } - - return std::vector<unsigned>(); -} - bool BlackfinTargetLowering:: isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Blackfin target isn't yet aware of offsets. diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index 9a54557..b65775b 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -48,9 +48,6 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; const char *getTargetNodeName(unsigned Opcode) const; diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index 598cf2a..60da4c4 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -19,12 +19,15 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_MC_DESC #include "BlackfinGenInstrInfo.inc" using namespace llvm; BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST) - : TargetInstrInfoImpl(BlackfinInsts, array_lengthof(BlackfinInsts)), + : TargetInstrInfoImpl(BlackfinInsts, array_lengthof(BlackfinInsts), + BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP), RI(ST, *this), Subtarget(ST) {} diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 6ca460e..2f4a453 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -29,13 +29,16 @@ #include "llvm/Type.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" + +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "BlackfinGenRegisterInfo.inc" + using namespace llvm; BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii) - : BlackfinGenRegisterInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP), - Subtarget(st), - TII(tii) {} + : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {} const unsigned* BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -356,6 +359,3 @@ int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, llvm_unreachable("What is the dwarf register number"); return -1; } - -#include "BlackfinGenRegisterInfo.inc" - diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 375d277..86f45c1 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -16,7 +16,9 @@ #define BLACKFINREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "BlackfinGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "BlackfinGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td index 0d502fd..1c42205 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.td +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td @@ -261,3 +261,17 @@ def StatBit : RegisterClass<"BF", [i1], 8, // Should be i40, but that isn't defined. It is not a legal type yet anyway. def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>; + +// Register classes to match inline asm constraints. +def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>; +def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>; +def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>; +def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3, + B0, B1, B2, B3, + L0, L1, L2, L3)>; +def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>; +def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>; +def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>; +def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX, + RETE, ASTAT, SEQSTAT, + USP)>; diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp index e104c52..5092026 100644 --- a/lib/Target/Blackfin/BlackfinSubtarget.cpp +++ b/lib/Target/Blackfin/BlackfinSubtarget.cpp @@ -17,6 +17,7 @@ using namespace llvm; BlackfinSubtarget::BlackfinSubtarget(const std::string &TT, + const std::string &CPU, const std::string &FS) : sdram(false), icplb(false), @@ -30,7 +31,9 @@ BlackfinSubtarget::BlackfinSubtarget(const std::string &TT, wa_killed_mmr(false), wa_rets(false) { - std::string CPU = "generic"; + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(FS, CPUName); } diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h index d667fe2..a1a09ec 100644 --- a/lib/Target/Blackfin/BlackfinSubtarget.h +++ b/lib/Target/Blackfin/BlackfinSubtarget.h @@ -32,11 +32,12 @@ namespace llvm { bool wa_killed_mmr; bool wa_rets; public: - BlackfinSubtarget(const std::string &TT, const std::string &FS); + BlackfinSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); }; diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp index e11920f..477c438 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -26,10 +26,11 @@ extern "C" void LLVMInitializeBlackfinTarget() { BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) : LLVMTargetMachine(T, TT), DataLayout("e-p:32:32-i64:32-f64:32-n32"), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h index 29b2b17..bd7dc84 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -36,7 +36,7 @@ namespace llvm { BlackfinIntrinsicInfo IntrinsicInfo; public: BlackfinTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt index a47299f..8fc63aa 100644 --- a/lib/Target/Blackfin/CMakeLists.txt +++ b/lib/Target/Blackfin/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS Blackfin.td) -tablegen(BlackfinGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(BlackfinGenRegisterNames.inc -gen-register-enums) -tablegen(BlackfinGenRegisterInfo.inc -gen-register-desc) -tablegen(BlackfinGenInstrNames.inc -gen-instr-enums) -tablegen(BlackfinGenInstrInfo.inc -gen-instr-desc) +tablegen(BlackfinGenRegisterInfo.inc -gen-register-info) +tablegen(BlackfinGenInstrInfo.inc -gen-instr-info) tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer) tablegen(BlackfinGenDAGISel.inc -gen-dag-isel) tablegen(BlackfinGenSubtarget.inc -gen-subtarget) diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile index 5eb8e9a..a9edec7 100644 --- a/lib/Target/Blackfin/Makefile +++ b/lib/Target/Blackfin/Makefile @@ -12,9 +12,8 @@ LIBRARYNAME = LLVMBlackfinCodeGen TARGET = Blackfin # Make sure that tblgen is run, first thing. -BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \ - BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \ - BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \ +BUILT_SOURCES = BlackfinGenRegisterInfo.inc BlackfinGenInstrInfo.inc \ + BlackfinGenAsmWriter.inc \ BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \ BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index 6fed195..88cc8eb 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -20,7 +20,8 @@ namespace llvm { struct CTargetMachine : public TargetMachine { - CTargetMachine(const Target &T, const std::string &TT, const std::string &FS) + CTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) : TargetMachine(T) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 09b48ce..7e2edd9 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -1,6 +1,5 @@ add_llvm_library(LLVMTarget Mangler.cpp - SubtargetFeature.cpp Target.cpp TargetAsmInfo.cpp TargetAsmLexer.cpp diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index a2a2ef1..d769cb9 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -1,12 +1,9 @@ set(LLVM_TARGET_DEFINITIONS SPU.td) -tablegen(SPUGenInstrNames.inc -gen-instr-enums) -tablegen(SPUGenRegisterNames.inc -gen-register-enums) tablegen(SPUGenAsmWriter.inc -gen-asm-writer) tablegen(SPUGenCodeEmitter.inc -gen-emitter) -tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SPUGenRegisterInfo.inc -gen-register-desc) -tablegen(SPUGenInstrInfo.inc -gen-instr-desc) +tablegen(SPUGenRegisterInfo.inc -gen-register-info) +tablegen(SPUGenInstrInfo.inc -gen-instr-info) tablegen(SPUGenDAGISel.inc -gen-dag-isel) tablegen(SPUGenSubtarget.inc -gen-subtarget) tablegen(SPUGenCallingConv.inc -gen-callingconv) diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile index 77c66be..5bb6f9c 100644 --- a/lib/Target/CellSPU/Makefile +++ b/lib/Target/CellSPU/Makefile @@ -10,10 +10,9 @@ LEVEL = ../../.. LIBRARYNAME = LLVMCellSPUCodeGen TARGET = SPU -BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \ +BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \ SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \ - SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \ - SPUGenInstrInfo.inc SPUGenDAGISel.inc \ + SPUGenDAGISel.inc \ SPUGenSubtarget.inc SPUGenCallingConv.inc DIRS = TargetInfo diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index 72f8430..5c81c9a 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -30,6 +30,7 @@ namespace llvm { // Defines symbolic names for the SPU instructions. // -#include "SPUGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "SPUGenInstrInfo.inc" #endif /* LLVM_TARGET_IBMCELLSPU_H */ diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 080434d..5087b47 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -15,7 +15,6 @@ #include "SPUInstrInfo.h" #include "SPUInstrBuilder.h" #include "SPUTargetMachine.h" -#include "SPUGenInstrInfo.inc" #include "SPUHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/Debug.h" @@ -23,6 +22,9 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/MC/MCContext.h" +#define GET_INSTRINFO_MC_DESC +#include "SPUGenInstrInfo.inc" + using namespace llvm; namespace { @@ -51,7 +53,8 @@ namespace { } SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) - : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0])), + : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0]), + SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl(), *this) { /* NOP */ } diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 623ae76..fefd141 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -43,6 +43,10 @@ #include "llvm/ADT/STLExtras.h" #include <cstdlib> +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "SPUGenRegisterInfo.inc" + using namespace llvm; /// getRegisterNumbering - Given the enum value for some register, e.g. @@ -185,9 +189,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii) : - SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), - Subtarget(subtarget), - TII(tii) + SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii) { } @@ -371,5 +373,3 @@ SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, assert( Reg && "Register scavenger failed"); return Reg; } - -#include "SPUGenRegisterInfo.inc" diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 6ecf0f2..5e014f8 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -16,7 +16,9 @@ #define SPU_REGISTERINFO_H #include "SPU.h" -#include "SPUGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "SPUGenRegisterInfo.inc" namespace llvm { class SPUSubtarget; diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h index 6c3afdf..e557ed3 100644 --- a/lib/Target/CellSPU/SPURegisterNames.h +++ b/lib/Target/CellSPU/SPURegisterNames.h @@ -13,6 +13,7 @@ // Define symbolic names for Cell registers. This defines a mapping from // register name to register number. // -#include "SPUGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "SPUGenRegisterInfo.inc" #endif diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp index 07c8352..a1a9f51 100644 --- a/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -19,7 +19,8 @@ using namespace llvm; -SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) : +SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS) : StackAlignment(16), ProcDirective(SPU::DEFAULT_PROC), UseLargeMem(false) diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h index d792930..69a60db 100644 --- a/lib/Target/CellSPU/SPUSubtarget.h +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -14,9 +14,8 @@ #ifndef CELLSUBTARGET_H #define CELLSUBTARGET_H -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetSubtarget.h" - +#include "llvm/MC/MCInstrItineraries.h" #include <string> namespace llvm { @@ -50,12 +49,12 @@ namespace llvm { /// This constructor initializes the data members to match that /// of the specified triple. /// - SPUSubtarget(const std::string &TT, const std::string &FS); + SPUSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); /// SetJITMode - This is called to inform the subtarget info that we are /// producing code for the JIT. diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 3ed7361..f04e982 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -35,9 +35,9 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { } SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) + const std::string &CPU,const std::string &FS) : LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 75abd5e..d96f86d 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine { InstrItineraryData InstrItins; public: SPUTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); /// Return the subtarget implementation object virtual const SPUSubtarget *getSubtargetImpl() const { diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index e42166e..8023e13 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -23,7 +23,7 @@ class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { CPPTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) + const std::string &CPU, const std::string &FS) : TargetMachine(T) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp index 1903796..1596596 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp @@ -86,8 +86,9 @@ namespace { : MBlazeBaseAsmLexer(T, MAI) { std::string tripleString("mblaze-unknown-unknown"); std::string featureString; + std::string CPU; OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, featureString)); + targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); InitRegisterMap(targetMachine->getRegisterInfo()); } }; diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index 004057a..2aa9847 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS MBlaze.td) -tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MBlazeGenRegisterNames.inc -gen-register-enums) -tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc) -tablegen(MBlazeGenInstrNames.inc -gen-instr-enums) -tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc) +tablegen(MBlazeGenRegisterInfo.inc -gen-register-info) +tablegen(MBlazeGenInstrInfo.inc -gen-instr-info) tablegen(MBlazeGenCodeEmitter.inc -gen-emitter) tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer) tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher) diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index 060a87b..1464274 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -27,6 +27,7 @@ // #include "MBlazeGenDecoderTables.inc" // #include "MBlazeGenRegisterNames.inc" +#define GET_INSTRINFO_MC_DESC #include "MBlazeGenInstrInfo.inc" #include "MBlazeGenEDInfo.inc" diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h index 00c73f0..e9aff5b 100644 --- a/lib/Target/MBlaze/MBlaze.h +++ b/lib/Target/MBlaze/MBlaze.h @@ -39,9 +39,11 @@ namespace llvm { // Defines symbolic names for MBlaze registers. This defines a mapping from // register name to register number. -#include "MBlazeGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "MBlazeGenRegisterInfo.inc" // Defines symbolic names for the MBlaze instructions. -#include "MBlazeGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "MBlazeGenInstrInfo.inc" #endif diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index 973e968..c07570a 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -109,7 +109,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate, // Hazard check MachineBasicBlock::iterator a = candidate; MachineBasicBlock::iterator b = slot; - TargetInstrDesc desc = candidate->getDesc(); + MCInstrDesc desc = candidate->getDesc(); // MBB layout:- // candidate := a0 = operation(a1, a2) @@ -183,7 +183,7 @@ static bool isDelayFiller(MachineBasicBlock &MBB, if (candidate == MBB.begin()) return false; - TargetInstrDesc brdesc = (--candidate)->getDesc(); + MCInstrDesc brdesc = (--candidate)->getDesc(); return (brdesc.hasDelaySlot()); } @@ -211,7 +211,7 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) { break; --I; - TargetInstrDesc desc = I->getDesc(); + MCInstrDesc desc = I->getDesc(); if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) || desc.isCall() || desc.isReturn() || desc.isBarrier() || hasUnknownSideEffects(I)) diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index c5e0a89..ba2de40 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -1114,15 +1114,19 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight( return weight; } -/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), -/// return a list of registers that can be used to satisfy the constraint. -/// This should only be used for C_RegisterClass constraints. +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': return std::make_pair(0U, MBlaze::GPRRegisterClass); + // TODO: These can't possibly be right, but match what was in + // getRegClassForInlineAsmConstraint. + case 'd': + case 'y': case 'f': if (VT == MVT::f32) return std::make_pair(0U, MBlaze::GPRRegisterClass); @@ -1131,32 +1135,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -/// Given a register class constraint, like 'r', if this corresponds directly -/// to an LLVM register class, return a register of 0 and the register class -/// pointer. -std::vector<unsigned> MBlazeTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { - default : break; - case 'r': - // GCC MBlaze Constraint Letters - case 'd': - case 'y': - case 'f': - return make_vector<unsigned>( - MBlaze::R3, MBlaze::R4, MBlaze::R5, MBlaze::R6, - MBlaze::R7, MBlaze::R9, MBlaze::R10, MBlaze::R11, - MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21, - MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25, - MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29, - MBlaze::R30, MBlaze::R31, 0); - } - return std::vector<unsigned>(); -} - bool MBlazeTargetLowering:: isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The MBlaze target isn't yet aware of offsets. diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 265c1a7..bb128da 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -173,10 +173,6 @@ namespace llvm { getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; /// isFPImmLegal - Returns true if the target can instruction select the diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 794ebed..a3af5d9 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -20,12 +20,15 @@ #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_MC_DESC #include "MBlazeGenInstrInfo.inc" using namespace llvm; MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm) - : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)), + : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts), + MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} static bool isZeroImm(const MachineOperand &op) { diff --git a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp index 3ece1a8..c573d4a 100644 --- a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp +++ b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp @@ -179,7 +179,7 @@ void MBlazeMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); + const MCInstrDesc &Desc = TII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; // Keep track of the current byte being emitted. unsigned CurByte = 0; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 517279f..441ece1 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -37,12 +37,15 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "MBlazeGenRegisterInfo.inc" + using namespace llvm; MBlazeRegisterInfo:: MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii) - : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP), - Subtarget(ST), TII(tii) {} + : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {} /// getRegisterNumbering - Given the enum value for some register, e.g. /// MBlaze::R0, return the number that it corresponds to (e.g. 0). @@ -359,6 +362,3 @@ int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); } - -#include "MBlazeGenRegisterInfo.inc" - diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 3807839..7ebce21 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -17,7 +17,9 @@ #include "MBlaze.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "MBlazeGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "MBlazeGenRegisterInfo.inc" namespace llvm { class MBlazeSubtarget; diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp index a80744a..034b5ce 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.cpp +++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -18,18 +18,22 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS): +MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS): HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false), HasFPU(false), HasMul64(false), HasSqrt(false) { // Parse features string. - std::string CPU = "mblaze"; - CPU = ParseSubtargetFeatures(FS, CPU); + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "mblaze"; + ParseSubtargetFeatures(FS, CPUName); // Only use instruction scheduling if the selected CPU has an instruction // itinerary (the default CPU is the only one that doesn't). - HasItin = CPU != "mblaze"; - DEBUG(dbgs() << "CPU " << CPU << "(" << HasItin << ")\n"); + HasItin = CPUName != "mblaze"; + DEBUG(dbgs() << "CPU " << CPUName << "(" << HasItin << ")\n"); // Compute the issue width of the MBlaze itineraries computeIssueWidth(); diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h index 2255b28..f5e0b4c 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.h +++ b/lib/Target/MBlaze/MBlazeSubtarget.h @@ -15,8 +15,7 @@ #define MBLAZESUBTARGET_H #include "llvm/Target/TargetSubtarget.h" -#include "llvm/Target/TargetMachine.h" - +#include "llvm/MC/MCInstrItineraries.h" #include <string> namespace llvm { @@ -39,12 +38,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. - MBlazeSubtarget(const std::string &TT, const std::string &FS); + MBlazeSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); /// Compute the number of maximum number of issues per cycle for the /// MBlaze scheduling itineraries. diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index df34a83..1cbd2d4 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -80,9 +80,9 @@ extern "C" void LLVMInitializeMBlazeTarget() { // an easier handling. MBlazeTargetMachine:: MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &FS): + const std::string &CPU, const std::string &FS): LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), FrameLowering(Subtarget), diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 48ce37a..cd6caaf 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -42,7 +42,7 @@ namespace llvm { public: MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile index e01c60b..171548f 100644 --- a/lib/Target/MBlaze/Makefile +++ b/lib/Target/MBlaze/Makefile @@ -11,13 +11,12 @@ LIBRARYNAME = LLVMMBlazeCodeGen TARGET = MBlaze # Make sure that tblgen is run, first thing. -BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \ - MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \ - MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \ - MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \ - MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \ - MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc \ - MBlazeGenEDInfo.inc +BUILT_SOURCES = MBlazeGenRegisterInfo.inc MBlazeGenInstrInfo.inc \ + MBlazeGenAsmWriter.inc \ + MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \ + MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \ + MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc \ + MBlazeGenEDInfo.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 2c7cbb6..613b259 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS MSP430.td) -tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MSP430GenRegisterNames.inc -gen-register-enums) -tablegen(MSP430GenRegisterInfo.inc -gen-register-desc) -tablegen(MSP430GenInstrNames.inc -gen-instr-enums) -tablegen(MSP430GenInstrInfo.inc -gen-instr-desc) +tablegen(MSP430GenRegisterInfo.inc -gen-register-info) +tablegen(MSP430GenInstrInfo.inc -gen-instr-info) tablegen(MSP430GenAsmWriter.inc -gen-asm-writer) tablegen(MSP430GenDAGISel.inc -gen-dag-isel) tablegen(MSP430GenCallingConv.inc -gen-callingconv) diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h index e742118..854d4e4 100644 --- a/lib/Target/MSP430/MSP430.h +++ b/lib/Target/MSP430/MSP430.h @@ -47,9 +47,11 @@ namespace llvm { // Defines symbolic names for MSP430 registers. // This defines a mapping from register name to register number. -#include "MSP430GenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "MSP430GenRegisterInfo.inc" // Defines symbolic names for the MSP430 instructions. -#include "MSP430GenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "MSP430GenInstrInfo.inc" #endif diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 424df13..bf201b0 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -15,7 +15,6 @@ #include "MSP430InstrInfo.h" #include "MSP430MachineFunctionInfo.h" #include "MSP430TargetMachine.h" -#include "MSP430GenInstrInfo.inc" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -23,10 +22,14 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" +#define GET_INSTRINFO_MC_DESC +#include "MSP430GenInstrInfo.inc" + using namespace llvm; MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm) - : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts)), + : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts), + MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), RI(tm, *this), TM(tm) {} void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -158,13 +161,13 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { } bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } @@ -293,7 +296,7 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, /// instruction may be. This returns the maximum number of bytes. /// unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const TargetInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); switch (Desc.TSFlags & MSP430II::SizeMask) { default: diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 53f4c2e..da0c3c6 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -26,13 +26,16 @@ #include "llvm/ADT/BitVector.h" #include "llvm/Support/ErrorHandling.h" +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "MSP430GenRegisterInfo.inc" + using namespace llvm; // FIXME: Provide proper call frame setup / destroy opcodes. MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii) - : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), - TM(tm), TII(tii) { + : MSP430GenRegisterInfo(), TM(tm), TII(tii) { StackAlign = TM.getFrameLowering()->getStackAlignment(); } @@ -117,12 +120,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; MachineInstr *New = 0; - if (Old->getOpcode() == getCallFrameSetupOpcode()) { + if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(Amount); } else { - assert(Old->getOpcode() == getCallFrameDestroyOpcode()); + assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); // factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; @@ -140,7 +143,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.insert(I, New); } } - } else if (I->getOpcode() == getCallFrameDestroyOpcode()) { + } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { @@ -250,5 +253,3 @@ int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { llvm_unreachable("Not implemented yet!"); return 0; } - -#include "MSP430GenRegisterInfo.inc" diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index e820558..fb70594 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -15,7 +15,9 @@ #define LLVM_TARGET_MSP430REGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "MSP430GenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "MSP430GenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index 1346cb9..a257abe 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -17,7 +17,9 @@ using namespace llvm; -MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &FS) { +MSP430Subtarget::MSP430Subtarget(const std::string &TT, + const std::string &CPUIgnored, + const std::string &FS) { std::string CPU = "generic"; // Parse features string. diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index 1070544..f36428a 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -26,12 +26,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - MSP430Subtarget(const std::string &TT, const std::string &FS); + MSP430Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); }; } // End llvm namespace diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index fba9536..3ee5e6a 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -28,9 +28,10 @@ extern "C" void LLVMInitializeMSP430Target() { MSP430TargetMachine::MSP430TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) : LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index cee3b04..2a9eea0 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine { public: MSP430TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile index fa4e80b..266330a 100644 --- a/lib/Target/MSP430/Makefile +++ b/lib/Target/MSP430/Makefile @@ -12,9 +12,8 @@ LIBRARYNAME = LLVMMSP430CodeGen TARGET = MSP430 # Make sure that tblgen is run, first thing. -BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \ - MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \ - MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \ +BUILT_SOURCES = MSP430GenRegisterInfo.inc MSP430GenInstrInfo.inc \ + MSP430GenAsmWriter.inc \ MSP430GenDAGISel.inc MSP430GenCallingConv.inc \ MSP430GenSubtarget.inc diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index fd16516..71b13c8 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) -tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MipsGenRegisterNames.inc -gen-register-enums) -tablegen(MipsGenRegisterInfo.inc -gen-register-desc) -tablegen(MipsGenInstrNames.inc -gen-instr-enums) -tablegen(MipsGenInstrInfo.inc -gen-instr-desc) +tablegen(MipsGenRegisterInfo.inc -gen-register-info) +tablegen(MipsGenInstrInfo.inc -gen-instr-info) tablegen(MipsGenAsmWriter.inc -gen-asm-writer) tablegen(MipsGenDAGISel.inc -gen-dag-isel) tablegen(MipsGenCallingConv.inc -gen-callingconv) diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index d16b066..0b6dd56 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -12,9 +12,8 @@ LIBRARYNAME = LLVMMipsCodeGen TARGET = Mips # Make sure that tblgen is run, first thing. -BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \ - MipsGenRegisterInfo.inc MipsGenInstrNames.inc \ - MipsGenInstrInfo.inc MipsGenAsmWriter.inc \ +BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ + MipsGenAsmWriter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtarget.inc diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 76a26a9..738b48c 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -35,9 +35,11 @@ namespace llvm { // Defines symbolic names for Mips registers. This defines a mapping from // register name to register number. -#include "MipsGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "MipsGenRegisterInfo.inc" // Defines symbolic names for the Mips instructions. -#include "MipsGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "MipsGenInstrInfo.inc" #endif diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 6f69ba3..78f69ea 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -38,6 +38,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/DebugInfo.h" + using namespace llvm; namespace { @@ -75,6 +77,10 @@ namespace { void EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); + + if (MI->isDebugValue()) + PrintDebugValueComment(MI, OS); + printInstruction(MI, OS); OutStreamer.EmitRawText(OS.str()); } @@ -86,6 +92,9 @@ namespace { virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); + virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); }; } // end of anonymous namespace @@ -441,6 +450,21 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitRawText(StringRef("\t.previous")); } +MachineLocation +MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue. + assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && + "Unexpected MachineOperand types"); + return MachineLocation(MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); +} + +void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + // TODO: implement +} + // Force static initialization. extern "C" void LLVMInitializeMipsAsmPrinter() { RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget); diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index b44a0af..c3a6211 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -59,10 +59,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { - const TargetInstrDesc& Tid = I->getDesc(); - if (Tid.hasDelaySlot() && + const MCInstrDesc& MCid = I->getDesc(); + if (MCid.hasDelaySlot() && (TM.getSubtarget<MipsSubtarget>().isMips1() || - Tid.isCall() || Tid.isBranch() || Tid.isReturn())) { + MCid.isCall() || MCid.isBranch() || MCid.isReturn())) { MachineBasicBlock::iterator J = I; ++J; BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP)); diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp index 4423f51..a622258 100644 --- a/lib/Target/Mips/MipsExpandPseudo.cpp +++ b/lib/Target/Mips/MipsExpandPseudo.cpp @@ -61,9 +61,9 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { - const TargetInstrDesc& Tid = I->getDesc(); + const MCInstrDesc& MCid = I->getDesc(); - switch(Tid.getOpcode()) { + switch(MCid.getOpcode()) { default: ++I; continue; @@ -87,7 +87,7 @@ void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB, MachineBasicBlock::iterator I) { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); - const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); + const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); DebugLoc dl = I->getDebugLoc(); const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(DstReg); @@ -103,7 +103,7 @@ void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB, unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = I->getOperand(1).getReg(); unsigned N = I->getOperand(2).getImm(); - const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); + const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); DebugLoc dl = I->getDebugLoc(); const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index c35c852..5f0c7e0 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -170,7 +170,8 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { Addr.getOperand(0).getOpcode() == ISD::LOAD) && Addr.getOperand(1).getOpcode() == MipsISD::Lo) { SDValue LoVal = Addr.getOperand(1); - if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) { + if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || + isa<GlobalAddressSDNode>(LoVal.getOperand(0))) { Base = Addr.getOperand(0); Offset = LoVal.getOperand(0); return true; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 01624c5..9e47a38 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1911,7 +1911,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (LoadSymAddr) { // Load callee address Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee); - SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee, + SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, 0); @@ -1921,9 +1921,6 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo); } else Callee = LoadValue; - - // Use chain output from LoadValue - Chain = LoadValue.getValue(1); } // copy to T9 @@ -2332,14 +2329,16 @@ MipsTargetLowering::getSingleConstraintMatchWeight( return weight; } -/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), -/// return a list of registers that can be used to satisfy the constraint. -/// This should only be used for C_RegisterClass constraints. +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { + case 'd': // Address register. Same as 'r' unless generating MIPS16 code. + case 'y': // Same as 'r'. Exists for compatibility. case 'r': return std::make_pair(0U, Mips::CPURegsRegisterClass); case 'f': @@ -2348,55 +2347,12 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const if (VT == MVT::f64) if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) return std::make_pair(0U, Mips::AFGR64RegisterClass); + break; } } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -/// Given a register class constraint, like 'r', if this corresponds directly -/// to an LLVM register class, return a register of 0 and the register class -/// pointer. -std::vector<unsigned> MipsTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const -{ - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { - default : break; - case 'r': - // GCC Mips Constraint Letters - case 'd': - case 'y': - return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3, - Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1, - Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, - Mips::T8, 0); - - case 'f': - if (VT == MVT::f32) { - if (Subtarget->isSingleFloat()) - return make_vector<unsigned>(Mips::F2, Mips::F3, Mips::F4, Mips::F5, - Mips::F6, Mips::F7, Mips::F8, Mips::F9, Mips::F10, Mips::F11, - Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, - Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, - Mips::F30, Mips::F31, 0); - else - return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8, - Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26, - Mips::F28, Mips::F30, 0); - } - - if (VT == MVT::f64) - if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) - return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4, - Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13, - Mips::D14, Mips::D15, 0); - } - return std::vector<unsigned>(); -} - bool MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Mips target isn't yet aware of offsets. diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index b7b85fd..bda26a2 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -169,10 +169,6 @@ namespace llvm { getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; /// isFPImmLegal - Returns true if the target can instruction select the diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index be044fa..deab5e5 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -18,12 +18,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_MC_DESC #include "MipsGenInstrInfo.inc" using namespace llvm; MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) - : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts)), + : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts), + Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} static bool isZeroImm(const MachineOperand &op) { @@ -214,6 +217,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Register class not handled!"); } +MachineInstr* +MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// @@ -341,8 +353,8 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand>& Cond) const { unsigned Opc = Cond[0].getImm(); - const TargetInstrDesc &TID = get(Opc); - MachineInstrBuilder MIB = BuildMI(&MBB, DL, TID); + const MCInstrDesc &MCID = get(Opc); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); for (unsigned i = 1; i < Cond.size(); ++i) MIB.addReg(Cond[i].getReg()); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index abf6773..b7f8bec 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -224,6 +224,11 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp index c86bf40..97ed878 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MipsMCAsmInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { AlignmentIsInBytes = false; - Data16bitsDirective = "\t.half\t"; + Data16bitsDirective = "\t.2byte\t"; Data32bitsDirective = "\t.4byte\t"; Data64bitsDirective = 0; PrivateGlobalPrefix = "$"; diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index fa64f63..202a1d4 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -35,13 +35,17 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/DebugInfo.h" + +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "MipsGenRegisterInfo.inc" using namespace llvm; MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, const TargetInstrInfo &tii) - : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), - Subtarget(ST), TII(tii) {} + : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {} /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). @@ -176,8 +180,29 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, << "spOffset : " << spOffset << "\n" << "stackSize : " << stackSize << "\n"); - int Offset; + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || + (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + FrameReg = Mips::SP; + else + FrameReg = getFrameRegister(MF); + // Calculate final offset. // - There is no need to change the offset if the frame object is one of the // following: an outgoing argument, pointer to a dynamically allocated @@ -185,12 +210,20 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // - If the frame object is any of the following, its offset must be adjusted // by adding the size of the stack: // incoming argument, callee-saved register location or local variable. + int Offset; + if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex)) Offset = spOffset; else Offset = spOffset + stackSize; + if (MI.isDebugValue()) { + MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return; + } + Offset += MI.getOperand(i-1).getImm(); DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); @@ -199,28 +232,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, int NewImm = 0; MachineBasicBlock &MBB = *MI.getParent(); bool ATUsed; - unsigned FrameReg; - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The following stack frame objects are always referenced relative to $sp: - // 1. Outgoing arguments. - // 2. Pointer to dynamically allocated stack space. - // 3. Locations for callee-saved registers. - // Everything else is referenced relative to whatever register - // getFrameRegister() returns. - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) - FrameReg = Mips::SP; - else - FrameReg = getFrameRegister(MF); - // Offset fits in the 16-bit field if (Offset < 0x8000 && Offset >= -0x8000) { NewReg = FrameReg; @@ -285,5 +297,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const { int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); } - -#include "MipsGenRegisterInfo.inc" diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 76b0035..646369b 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -16,7 +16,9 @@ #include "Mips.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "MipsGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "MipsGenRegisterInfo.inc" namespace llvm { class MipsSubtarget; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 70747f5..306ea11 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -16,18 +16,20 @@ #include "MipsGenSubtarget.inc" using namespace llvm; -MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS, - bool little) : +MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool little) : MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false) { - std::string CPU = "mips1"; + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "mips1"; MipsArchVersion = Mips1; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(FS, CPUName); // Is the target system Linux ? if (TT.find("linux") == std::string::npos) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 096bbed..8acbf5b 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -15,8 +15,7 @@ #define MIPSSUBTARGET_H #include "llvm/Target/TargetSubtarget.h" -#include "llvm/Target/TargetMachine.h" - +#include "llvm/MC/MCInstrItineraries.h" #include <string> namespace llvm { @@ -92,12 +91,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. - MipsSubtarget(const std::string &TT, const std::string &FS, bool little); + MipsSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool little); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); bool isMips1() const { return MipsArchVersion == Mips1; } bool isMips32() const { return MipsArchVersion >= Mips32; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index cfbb92c..88ce3b8 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -34,10 +34,11 @@ extern "C" void LLVMInitializeMipsTarget() { // an easier handling. // Using CodeModel::Large enables different CALL behavior. MipsTargetMachine:: -MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, +MipsTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool isLittle=false): LLVMTargetMachine(T, TT), - Subtarget(TT, FS, isLittle), + Subtarget(TT, CPU, FS, isLittle), DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), @@ -55,8 +56,8 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, MipselTargetMachine:: MipselTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) : - MipsTargetMachine(T, TT, FS, true) {} + const std::string &CPU, const std::string &FS) : + MipsTargetMachine(T, TT, CPU, FS, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 102dd85..a021af2 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -35,7 +35,8 @@ namespace llvm { MipsSelectionDAGInfo TSInfo; public: MipsTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool isLittle); + const std::string &CPU, const std::string &FS, + bool isLittle); virtual const MipsInstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -73,7 +74,7 @@ namespace llvm { class MipselTargetMachine : public MipsTargetMachine { public: MipselTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; } // End llvm namespace diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index c4448d6..33bae7c 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -3,11 +3,8 @@ set(LLVM_TARGET_DEFINITIONS PTX.td) tablegen(PTXGenAsmWriter.inc -gen-asm-writer) tablegen(PTXGenCallingConv.inc -gen-callingconv) tablegen(PTXGenDAGISel.inc -gen-dag-isel) -tablegen(PTXGenInstrInfo.inc -gen-instr-desc) -tablegen(PTXGenInstrNames.inc -gen-instr-enums) -tablegen(PTXGenRegisterInfo.inc -gen-register-desc) -tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PTXGenRegisterNames.inc -gen-register-enums) +tablegen(PTXGenInstrInfo.inc -gen-instr-info) +tablegen(PTXGenRegisterInfo.inc -gen-register-info) tablegen(PTXGenSubtarget.inc -gen-subtarget) add_llvm_target(PTXCodeGen diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile index 844480f..9dccb4a 100644 --- a/lib/Target/PTX/Makefile +++ b/lib/Target/PTX/Makefile @@ -16,10 +16,7 @@ BUILT_SOURCES = PTXGenAsmWriter.inc \ PTXGenCallingConv.inc \ PTXGenDAGISel.inc \ PTXGenInstrInfo.inc \ - PTXGenInstrNames.inc \ PTXGenRegisterInfo.inc \ - PTXGenRegisterInfo.h.inc \ - PTXGenRegisterNames.inc \ PTXGenSubtarget.inc DIRS = TargetInfo diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index ec2be92..6aaf068 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -47,9 +47,11 @@ namespace llvm { } // namespace llvm; // Defines symbolic names for PTX registers. -#include "PTXGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "PTXGenRegisterInfo.inc" // Defines symbolic names for the PTX instructions. -#include "PTXGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "PTXGenInstrInfo.inc" #endif // PTX_H diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 6a36b24..f6fbe9f 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -30,31 +30,51 @@ def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false", //===- PTX Version --------------------------------------------------------===// def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", - "Use PTX Language Version 2.0", - []>; + "Use PTX Language Version 2.0">; def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", - "Use PTX Language Version 2.1", - [FeaturePTX20]>; + "Use PTX Language Version 2.1">; def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", - "Use PTX Language Version 2.2", - [FeaturePTX21]>; + "Use PTX Language Version 2.2">; def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3", - "Use PTX Language Version 2.3", - [FeaturePTX22]>; - -//===- PTX Shader Model ---------------------------------------------------===// - -def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0", - "Enable Shader Model 1.0 compliance">; -def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3", - "Enable Shader Model 1.3 compliance", - [FeatureSM10, FeatureDouble]>; -def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0", - "Enable Shader Model 2.0 compliance", - [FeatureSM13]>; + "Use PTX Language Version 2.3">; + +//===- PTX Target ---------------------------------------------------------===// + +def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0", + "Use Shader Model 1.0">; +def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1", + "Use Shader Model 1.1">; +def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", + "Use Shader Model 1.2">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", + "Use Shader Model 1.3">; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", + "Use Shader Model 2.0">; +def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", + "Use Shader Model 2.1">; +def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", + "Use Shader Model 2.2">; +def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", + "Use Shader Model 2.3">; + +def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", + "PTX_COMPUTE_1_0", + "Use Compute Compatibility 1.0">; +def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget", + "PTX_COMPUTE_1_1", + "Use Compute Compatibility 1.1">; +def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget", + "PTX_COMPUTE_1_2", + "Use Compute Compatibility 1.2">; +def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", + "PTX_COMPUTE_1_3", + "Use Compute Compatibility 1.3">; +def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", + "PTX_COMPUTE_2_0", + "Use Compute Compatibility 2.0">; //===----------------------------------------------------------------------===// // PTX supported processors @@ -65,6 +85,27 @@ class Proc<string Name, list<SubtargetFeature> Features> def : Proc<"generic", []>; +// Processor definitions for compute/shader models +def : Proc<"compute_10", [FeatureCOMPUTE10]>; +def : Proc<"compute_11", [FeatureCOMPUTE11]>; +def : Proc<"compute_12", [FeatureCOMPUTE12]>; +def : Proc<"compute_13", [FeatureCOMPUTE13]>; +def : Proc<"compute_20", [FeatureCOMPUTE20]>; +def : Proc<"sm_10", [FeatureSM10]>; +def : Proc<"sm_11", [FeatureSM11]>; +def : Proc<"sm_12", [FeatureSM12]>; +def : Proc<"sm_13", [FeatureSM13]>; +def : Proc<"sm_20", [FeatureSM20]>; +def : Proc<"sm_21", [FeatureSM21]>; +def : Proc<"sm_22", [FeatureSM22]>; +def : Proc<"sm_23", [FeatureSM23]>; + +// Processor definitions for common GPU architectures +def : Proc<"g80", [FeatureSM10]>; +def : Proc<"gt200", [FeatureSM13]>; +def : Proc<"gf100", [FeatureSM20, FeatureDouble]>; +def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index b1f7c1e..2848d54 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -22,10 +22,12 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" @@ -35,6 +37,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -63,8 +66,13 @@ public: const char *Modifier = 0); void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, const char *Modifier = 0); + void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); + unsigned GetOrCreateSourceID(StringRef FileName, + StringRef DirName); + // autogen'd. void printInstruction(const MachineInstr *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); @@ -72,10 +80,13 @@ public: private: void EmitVariableDeclaration(const GlobalVariable *gv); void EmitFunctionDeclaration(); + + StringMap<unsigned> SourceIdMap; }; // class PTXAsmPrinter } // namespace static const char PARAM_PREFIX[] = "__param_"; +static const char RETURN_PREFIX[] = "__ret_"; static const char *getRegisterTypeName(unsigned RegNo) { #define TEST_REGCLS(cls, clsstr) \ @@ -172,6 +183,20 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) OutStreamer.AddBlankLine(); + // Define any .file directives + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + DICompileUnit DIUnit(*I); + StringRef FN = DIUnit.getFilename(); + StringRef Dir = DIUnit.getDirectory(); + GetOrCreateSourceID(FN, Dir); + } + + OutStreamer.AddBlankLine(); + // declare global variables for (Module::const_global_iterator i = M.global_begin(), e = M.global_end(); i != e; ++i) @@ -225,6 +250,54 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { raw_string_ostream OS(str); + DebugLoc DL = MI->getDebugLoc(); + if (!DL.isUnknown()) { + + const MDNode *S = DL.getScope(MF->getFunction()->getContext()); + + // This is taken from DwarfDebug.cpp, which is conveniently not a public + // LLVM class. + StringRef Fn; + StringRef Dir; + unsigned Src = 1; + if (S) { + DIDescriptor Scope(S); + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Fn = CU.getFilename(); + Dir = CU.getDirectory(); + } else if (Scope.isFile()) { + DIFile F(S); + Fn = F.getFilename(); + Dir = F.getDirectory(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Fn = SP.getFilename(); + Dir = SP.getDirectory(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Fn = DB.getFilename(); + Dir = DB.getDirectory(); + } else + assert(0 && "Unexpected scope info"); + + Src = GetOrCreateSourceID(Fn, Dir); + } + OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(), + 0, 0, 0, Fn); + + const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc(); + + OS << "\t.loc "; + OS << utostr(MDL.getFileNum()); + OS << " "; + OS << utostr(MDL.getLine()); + OS << " "; + OS << utostr(MDL.getColumn()); + OS << "\n"; + } + + // Emit predicate printPredicateOperand(MI, OS); @@ -298,6 +371,11 @@ void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum, OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; } +void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +} + void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(gv)) @@ -417,12 +495,14 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); const bool isKernel = MFI->isKernel(); + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); std::string decl = isKernel ? ".entry" : ".func"; + unsigned cnt = 0; + if (!isKernel) { decl += " ("; - for (PTXMachineFunctionInfo::ret_iterator i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; i != e; ++i) { @@ -443,7 +523,7 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { decl += " ("; - unsigned cnt = 0; + cnt = 0; // Print parameters for (PTXMachineFunctionInfo::reg_iterator @@ -452,7 +532,7 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { if (i != b) { decl += ", "; } - if (isKernel) { + if (isKernel || ST.useParamSpaceForDeviceArgs()) { decl += ".param .b"; decl += utostr(*i); decl += " "; @@ -467,41 +547,6 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { } decl += ")"; - // // Print parameter list - // if (!MFI->argRegEmpty()) { - // decl += " ("; - // if (isKernel) { - // unsigned cnt = 0; - // for(PTXMachineFunctionInfo::reg_iterator - // i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - // i != e; ++i) { - // reg = *i; - // assert(reg != PTX::NoRegister && "Not a valid register!"); - // if (i != b) - // decl += ", "; - // decl += ".param ."; - // decl += getRegisterTypeName(reg); - // decl += " "; - // decl += PARAM_PREFIX; - // decl += utostr(++cnt); - // } - // } else { - // for (PTXMachineFunctionInfo::reg_iterator - // i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - // i != e; ++i) { - // reg = *i; - // assert(reg != PTX::NoRegister && "Not a valid register!"); - // if (i != b) - // decl += ", "; - // decl += ".reg ."; - // decl += getRegisterTypeName(reg); - // decl += " "; - // decl += getRegisterName(reg); - // } - // } - // decl += ")"; - // } - OutStreamer.EmitRawText(Twine(decl)); } @@ -524,6 +569,33 @@ printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { } } +unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, + StringRef DirName) { + // If FE did not provide a file name, then assume stdin. + if (FileName.empty()) + return GetOrCreateSourceID("<stdin>", StringRef()); + + // MCStream expects full path name as filename. + if (!DirName.empty() && !sys::path::is_absolute(FileName)) { + SmallString<128> FullPathName = DirName; + sys::path::append(FullPathName, FileName); + // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. + return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); + } + + StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); + if (Entry.getValue()) + return Entry.getValue(); + + unsigned SrcId = SourceIdMap.size(); + Entry.setValue(SrcId); + + // Print out a .file directive to specify files for .loc directives. + OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); + + return SrcId; +} + #include "PTXGenAsmWriter.inc" // Force static initialization. diff --git a/lib/Target/PTX/PTXCallingConv.td b/lib/Target/PTX/PTXCallingConv.td index 4d7759b..3e3ff48 100644 --- a/lib/Target/PTX/PTXCallingConv.td +++ b/lib/Target/PTX/PTXCallingConv.td @@ -1,3 +1,4 @@ + //===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure @@ -11,26 +12,18 @@ // //===----------------------------------------------------------------------===// -// Currently, we reserve one register of each type for return values and let -// the rest be used for parameters. This is a dirty hack, but I am not sure -// how to tell LLVM that registers used for parameter passing cannot be used -// for return values. - -// PTX Calling Conventions +// PTX Formal Parameter Calling Convention def CC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P1, P2, P3, P4, P5, P6, P7]>>, - CCIfType<[i16], CCAssignToReg<[RH1, RH2, RH3, RH4, RH5, RH6, RH7]>>, - CCIfType<[i32, f32], CCAssignToReg<[R1, R2, R3, R4, R5, R6, R7]>>, - CCIfType<[i64, f64], CCAssignToReg<[RD1, RD2, RD3, RD4, RD5, RD6, RD7]>> + CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>, + CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>, + CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>> ]>; -//===----------------------------------------------------------------------===// -// Return Value Calling Conventions -//===----------------------------------------------------------------------===// - +// PTX Return Value Calling Convention def RetCC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P0]>>, - CCIfType<[i16], CCAssignToReg<[RH0]>>, - CCIfType<[i32, f32], CCAssignToReg<[R0]>>, - CCIfType<[i64, f64], CCAssignToReg<[RD0]>> + CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>, + CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>, + CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>> ]>; diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp index b3c85da..9adfa62 100644 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "PTXTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -41,8 +42,6 @@ class PTXDAGToDAGISel : public SelectionDAGISel { #include "PTXGenDAGISel.inc" private: - SDNode *SelectREAD_PARAM(SDNode *Node); - // We need this only because we can't match intruction BRAdp // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td SDNode *SelectBRCOND(SDNode *Node); @@ -67,8 +66,6 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { switch (Node->getOpcode()) { - case PTXISD::READ_PARAM: - return SelectREAD_PARAM(Node); case ISD::BRCOND: return SelectBRCOND(Node); default: @@ -76,37 +73,6 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { } } -SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) { - SDValue index = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); - unsigned opcode; - - if (index.getOpcode() != ISD::TargetConstant) - llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant"); - - if (Node->getValueType(0) == MVT::i16) { - opcode = PTX::LDpiU16; - } - else if (Node->getValueType(0) == MVT::i32) { - opcode = PTX::LDpiU32; - } - else if (Node->getValueType(0) == MVT::i64) { - opcode = PTX::LDpiU64; - } - else if (Node->getValueType(0) == MVT::f32) { - opcode = PTX::LDpiF32; - } - else if (Node->getValueType(0) == MVT::f64) { - opcode = PTX::LDpiF64; - } - else { - llvm_unreachable("Unknown parameter type for ld.param"); - } - - return PTXInstrInfo:: - GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index); -} - SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { assert(Node->getNumOperands() >= 3); diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index c3cdaba..7831aa0 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -15,6 +15,7 @@ #include "PTXISelLowering.h" #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" +#include "PTXSubtarget.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" @@ -46,38 +47,59 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); setBooleanContents(ZeroOrOneBooleanContent); - - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - - // Turn i16 (z)extload into load + (z)extend + setMinFunctionAlignment(2); + + //////////////////////////////////// + /////////// Expansion ////////////// + //////////////////////////////////// + + // (any/zero/sign) extload => load + (any/zero/sign) extend + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); - - // Turn f32 extload into load + fextend - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - - // Turn f64 truncstore into trunc + store. - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // Customize translation of memory addresses - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - - // Expand BR_CC into BRCOND + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); + + // f32 extload => load + fextend + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // f64 truncstore => trunc + store + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // sign_extend_inreg => sign_extend + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // br_cc => brcond + setOperationAction(ISD::BR_CC, MVT::Other, Expand); - // Expand SELECT_CC into SETCC + // select_cc => setcc + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - - // need to lower SETCC of RegPred into bitwise logic + + //////////////////////////////////// + //////////// Legal ///////////////// + //////////////////////////////////// + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + //////////////////////////////////// + //////////// Custom //////////////// + //////////////////////////////////// + + // customise setcc to use bitwise logic if possible + setOperationAction(ISD::SETCC, MVT::i1, Custom); - setMinFunctionAlignment(2); + // customize translation of memory addresses + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); // Compute derived properties from the register classes computeRegisterProperties(); @@ -104,8 +126,10 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { llvm_unreachable("Unknown opcode"); case PTXISD::COPY_ADDRESS: return "PTXISD::COPY_ADDRESS"; - case PTXISD::READ_PARAM: - return "PTXISD::READ_PARAM"; + case PTXISD::LOAD_PARAM: + return "PTXISD::LOAD_PARAM"; + case PTXISD::STORE_PARAM: + return "PTXISD::STORE_PARAM"; case PTXISD::EXIT: return "PTXISD::EXIT"; case PTXISD::RET: @@ -192,6 +216,7 @@ SDValue PTXTargetLowering:: if (isVarArg) llvm_unreachable("PTX does not support varargs"); MachineFunction &MF = DAG.getMachineFunction(); + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); switch (CallConv) { @@ -206,13 +231,17 @@ SDValue PTXTargetLowering:: break; } - if (MFI->isKernel()) { - // For kernel functions, we just need to emit the proper READ_PARAM ISDs + // We do one of two things here: + // IsKernel || SM >= 2.0 -> Use param space for arguments + // SM < 2.0 -> Use registers for arguments + if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { + // We just need to emit the proper LOAD_PARAM ISDs for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert(Ins[i].VT != MVT::i1 && "Kernels cannot take pred operands"); + assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && + "Kernels cannot take pred operands"); - SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, Ins[i].VT, Chain, + SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, DAG.getTargetConstant(i, MVT::i32)); InVals.push_back(ArgValue); @@ -299,16 +328,20 @@ SDValue PTXTargetLowering:: MachineFunction& MF = DAG.getMachineFunction(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); SDValue Flag; + // Even though we could use the .param space for return arguments for + // device functions if SM >= 2.0 and the number of return arguments is + // only 1, we just always use registers since this makes the codegen + // easier. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_PTX); for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - CCValAssign& VA = RVLocs[i]; assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h index ead17ed..4318541 100644 --- a/lib/Target/PTX/PTXISelLowering.h +++ b/lib/Target/PTX/PTXISelLowering.h @@ -24,12 +24,13 @@ class PTXTargetMachine; namespace PTXISD { enum NodeType { FIRST_NUMBER = ISD::BUILTIN_OP_END, - READ_PARAM, + LOAD_PARAM, + STORE_PARAM, EXIT, RET, COPY_ADDRESS }; -} // namespace PTXISD +} // namespace PTXISD class PTXTargetLowering : public TargetLowering { public: diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 5bdac89..1bbd8d5 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -21,10 +21,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; - +#define GET_INSTRINFO_MC_DESC #include "PTXGenInstrInfo.inc" +using namespace llvm; + PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)), RI(_TM, *this), TM(_TM) {} @@ -47,8 +48,8 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { if (map[i].cls->contains(DstReg, SrcReg)) { - const TargetInstrDesc &TID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg). + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). addReg(SrcReg, getKillRegState(KillSrc)); AddDefaultPredicate(MI); return; @@ -69,8 +70,8 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) if (DstRC == map[i].cls) { - const TargetInstrDesc &TID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg); + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg); AddDefaultPredicate(MI); return true; } @@ -178,13 +179,13 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::const_iterator iter = MBB.end(); const MachineInstr& instLast1 = *--iter; - const TargetInstrDesc &desc1 = instLast1.getDesc(); + const MCInstrDesc &desc1 = instLast1.getDesc(); // for special case that MBB has only 1 instruction const bool IsSizeOne = MBB.size() == 1; // if IsSizeOne is true, *--iter and instLast2 are invalid // we put a dummy value in instLast2 and desc2 since they are used const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; - const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); + const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); DEBUG(dbgs() << "\n"); DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); @@ -387,7 +388,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { } bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { - const TargetInstrDesc &desc = inst.getDesc(); + const MCInstrDesc &desc = inst.getDesc(); return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch(); } diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index cc74944..6bfe906 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -26,10 +26,10 @@ def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; // Shader Model Support -def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">; -def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">; -def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">; -def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">; +def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; +def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; +def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">; +def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">; // PTX Version Support def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; @@ -163,6 +163,10 @@ def MEMpi : Operand<i32> { let PrintMethod = "printParamOperand"; let MIOperandInfo = (ops i32imm); } +def MEMret : Operand<i32> { + let PrintMethod = "printReturnOperand"; + let MIOperandInfo = (ops i32imm); +} // Branch & call targets have OtherVT type. def brtarget : Operand<OtherVT>; @@ -180,10 +184,19 @@ def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>; def PTXexit : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; def PTXret - : SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>; + : SDNode<"PTXISD::RET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def PTXcopyaddress : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; +// Load/store .param space +def PTXloadparam + : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXstoreparam + : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// @@ -600,43 +613,43 @@ def FDIVrr32SM13 : InstPTX<(outs RegF32:$d), (ins RegF32:$a, RegF32:$b), "div.rn.f32\t$d, $a, $b", [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[SupportsSM13]>; + Requires<[FDivNeedsRoundingMode]>; def FDIVri32SM13 : InstPTX<(outs RegF32:$d), (ins RegF32:$a, f32imm:$b), "div.rn.f32\t$d, $a, $b", [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[SupportsSM13]>; + Requires<[FDivNeedsRoundingMode]>; def FDIVrr32SM10 : InstPTX<(outs RegF32:$d), (ins RegF32:$a, RegF32:$b), "div.f32\t$d, $a, $b", [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[DoesNotSupportSM13]>; + Requires<[FDivNoRoundingMode]>; def FDIVri32SM10 : InstPTX<(outs RegF32:$d), (ins RegF32:$a, f32imm:$b), "div.f32\t$d, $a, $b", [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[DoesNotSupportSM13]>; + Requires<[FDivNoRoundingMode]>; def FDIVrr64SM13 : InstPTX<(outs RegF64:$d), (ins RegF64:$a, RegF64:$b), "div.rn.f64\t$d, $a, $b", [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[SupportsSM13]>; + Requires<[FDivNeedsRoundingMode]>; def FDIVri64SM13 : InstPTX<(outs RegF64:$d), (ins RegF64:$a, f64imm:$b), "div.rn.f64\t$d, $a, $b", [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[SupportsSM13]>; + Requires<[FDivNeedsRoundingMode]>; def FDIVrr64SM10 : InstPTX<(outs RegF64:$d), (ins RegF64:$a, RegF64:$b), "div.f64\t$d, $a, $b", [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[DoesNotSupportSM13]>; + Requires<[FDivNoRoundingMode]>; def FDIVri64SM10 : InstPTX<(outs RegF64:$d), (ins RegF64:$a, f64imm:$b), "div.f64\t$d, $a, $b", [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[DoesNotSupportSM13]>; + Requires<[FDivNoRoundingMode]>; @@ -648,8 +661,10 @@ def FDIVri64SM10 : InstPTX<(outs RegF64:$d), // In the short term, mad is supported on all PTX versions and we use a // default rounding mode no matter what shader model or PTX version. // TODO: Allow the rounding mode to be selectable through llc. -defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>; -defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>; +defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, + Requires<[FMadNeedsRoundingMode, SupportsFMA]>; +defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, + Requires<[FMadNoRoundingMode, SupportsFMA]>; ///===- Floating-Point Intrinsic Instructions -----------------------------===// @@ -816,17 +831,48 @@ defm LDc : PTX_LD_ALL<"ld.const", load_constant>; defm LDl : PTX_LD_ALL<"ld.local", load_local>; defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; -// This is a special instruction that is manually inserted for kernel parameters -def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), - "ld.param.u16\t$d, [$a]", []>; -def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), - "ld.param.u32\t$d, [$a]", []>; -def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), - "ld.param.u64\t$d, [$a]", []>; -def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), - "ld.param.f32\t$d, [$a]", []>; -def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), - "ld.param.f64\t$d, [$a]", []>; +// These instructions are used to load/store from the .param space for +// device and kernel parameters + +let hasSideEffects = 1 in { + def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a), + "ld.param.pred\t$d, [$a]", + [(set RegPred:$d, (PTXloadparam timm:$a))]>; + def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), + "ld.param.u16\t$d, [$a]", + [(set RegI16:$d, (PTXloadparam timm:$a))]>; + def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), + "ld.param.u32\t$d, [$a]", + [(set RegI32:$d, (PTXloadparam timm:$a))]>; + def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), + "ld.param.u64\t$d, [$a]", + [(set RegI64:$d, (PTXloadparam timm:$a))]>; + def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), + "ld.param.f32\t$d, [$a]", + [(set RegF32:$d, (PTXloadparam timm:$a))]>; + def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), + "ld.param.f64\t$d, [$a]", + [(set RegF64:$d, (PTXloadparam timm:$a))]>; + + def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a), + "st.param.pred\t[$d], $a", + [(PTXstoreparam timm:$d, RegPred:$a)]>; + def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a), + "st.param.u16\t[$d], $a", + [(PTXstoreparam timm:$d, RegI16:$a)]>; + def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a), + "st.param.u32\t[$d], $a", + [(PTXstoreparam timm:$d, RegI32:$a)]>; + def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a), + "st.param.u64\t[$d], $a", + [(PTXstoreparam timm:$d, RegI64:$a)]>; + def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a), + "st.param.f32\t[$d], $a", + [(PTXstoreparam timm:$d, RegF32:$a)]>; + def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a), + "st.param.f64\t[$d], $a", + [(PTXstoreparam timm:$d, RegF64:$a)]>; +} // Stores defm STg : PTX_ST_ALL<"st.global", store_global>; @@ -842,33 +888,41 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>; // by performing a greater-than test between the value and zero. This follows // the C convention that any non-zero value is equivalent to 'true'. def CVT_pred_u16 - : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.b16\t$d, $a, 0", + : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0", [(set RegPred:$d, (trunc RegI16:$a))]>; def CVT_pred_u32 - : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.b32\t$d, $a, 0", + : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0", [(set RegPred:$d, (trunc RegI32:$a))]>; def CVT_pred_u64 - : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.b64\t$d, $a, 0", + : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0", [(set RegPred:$d, (trunc RegI64:$a))]>; def CVT_pred_f32 - : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.b32\t$d, $a, 0", + : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0", [(set RegPred:$d, (fp_to_uint RegF32:$a))]>; def CVT_pred_f64 - : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.b64\t$d, $a, 0", + : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0", [(set RegPred:$d, (fp_to_uint RegF64:$a))]>; // Conversion to u16 // PTX does not directly support converting a predicate to a value, so we // use a select instruction to select either 0 or 1 (integer or fp) based // on the truth value of the predicate. +def CVT_u16_preda + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (anyext RegPred:$a))]>; + def CVT_u16_pred : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", [(set RegI16:$d, (zext RegPred:$a))]>; +def CVT_u16_preds + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (sext RegPred:$a))]>; + def CVT_u16_u32 : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", [(set RegI16:$d, (trunc RegI32:$a))]>; @@ -891,10 +945,22 @@ def CVT_u32_pred : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", [(set RegI32:$d, (zext RegPred:$a))]>; +def CVT_u32_b16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", + [(set RegI32:$d, (anyext RegI16:$a))]>; + def CVT_u32_u16 : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", [(set RegI32:$d, (zext RegI16:$a))]>; +def CVT_u32_preds + : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", + [(set RegI32:$d, (sext RegPred:$a))]>; + +def CVT_u32_s16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a", + [(set RegI32:$d, (sext RegI16:$a))]>; + def CVT_u32_u64 : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", [(set RegI32:$d, (trunc RegI64:$a))]>; @@ -913,14 +979,26 @@ def CVT_u64_pred : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", [(set RegI64:$d, (zext RegPred:$a))]>; +def CVT_u64_preds + : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", + [(set RegI64:$d, (sext RegPred:$a))]>; + def CVT_u64_u16 : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", [(set RegI64:$d, (zext RegI16:$a))]>; +def CVT_u64_s16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a", + [(set RegI64:$d, (sext RegI16:$a))]>; + def CVT_u64_u32 : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", [(set RegI64:$d, (zext RegI32:$a))]>; +def CVT_u64_s32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a", + [(set RegI64:$d, (sext RegI32:$a))]>; + def CVT_u64_f32 : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", [(set RegI64:$d, (fp_to_uint RegF32:$a))]>; diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 1da4b5d..9d65f5b 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -26,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo { private: bool is_kernel; std::vector<unsigned> reg_arg, reg_local_var; - DenseSet<unsigned> reg_ret; + std::vector<unsigned> reg_ret; bool _isDoneAddArg; public: @@ -40,7 +40,11 @@ public: void addArgReg(unsigned reg) { reg_arg.push_back(reg); } void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } - void addRetReg(unsigned reg) { reg_ret.insert(reg); } + void addRetReg(unsigned reg) { + if (!isRetReg(reg)) { + reg_ret.push_back(reg); + } + } void doneAddArg(void) { _isDoneAddArg = true; @@ -51,7 +55,7 @@ public: typedef std::vector<unsigned>::const_iterator reg_iterator; typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator; - typedef DenseSet<unsigned>::const_iterator ret_iterator; + typedef std::vector<unsigned>::const_iterator ret_iterator; bool argRegEmpty() const { return reg_arg.empty(); } int getNumArg() const { return reg_arg.size(); } diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp index b7c7ee5..f32c2b7 100644 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -17,10 +17,16 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; - +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC #include "PTXGenRegisterInfo.inc" +using namespace llvm; + +PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, + const TargetInstrInfo &TII) + : PTXGenRegisterInfo() { +} void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h index 223e965..0b63cb6 100644 --- a/lib/Target/PTX/PTXRegisterInfo.h +++ b/lib/Target/PTX/PTXRegisterInfo.h @@ -17,7 +17,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/BitVector.h" -#include "PTXGenRegisterInfo.h.inc" +#define GET_REGINFO_HEADER +#include "PTXGenRegisterInfo.inc" namespace llvm { class PTXTargetMachine; @@ -25,7 +26,7 @@ class MachineFunction; struct PTXRegisterInfo : public PTXGenRegisterInfo { PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII) {} + const TargetInstrInfo &TII); virtual const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const { diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index 08a39a8..1313d24 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -1,3 +1,4 @@ + //===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// // // The LLVM Compiler Infrastructure @@ -21,55 +22,534 @@ class PTXReg<string n> : Register<n> { ///===- Predicate Registers -----------------------------------------------===// -def P0 : PTXReg<"p0">; -def P1 : PTXReg<"p1">; -def P2 : PTXReg<"p2">; -def P3 : PTXReg<"p3">; -def P4 : PTXReg<"p4">; -def P5 : PTXReg<"p5">; -def P6 : PTXReg<"p6">; -def P7 : PTXReg<"p7">; +def P0 : PTXReg<"p0">; +def P1 : PTXReg<"p1">; +def P2 : PTXReg<"p2">; +def P3 : PTXReg<"p3">; +def P4 : PTXReg<"p4">; +def P5 : PTXReg<"p5">; +def P6 : PTXReg<"p6">; +def P7 : PTXReg<"p7">; +def P8 : PTXReg<"p8">; +def P9 : PTXReg<"p9">; +def P10 : PTXReg<"p10">; +def P11 : PTXReg<"p11">; +def P12 : PTXReg<"p12">; +def P13 : PTXReg<"p13">; +def P14 : PTXReg<"p14">; +def P15 : PTXReg<"p15">; +def P16 : PTXReg<"p16">; +def P17 : PTXReg<"p17">; +def P18 : PTXReg<"p18">; +def P19 : PTXReg<"p19">; +def P20 : PTXReg<"p20">; +def P21 : PTXReg<"p21">; +def P22 : PTXReg<"p22">; +def P23 : PTXReg<"p23">; +def P24 : PTXReg<"p24">; +def P25 : PTXReg<"p25">; +def P26 : PTXReg<"p26">; +def P27 : PTXReg<"p27">; +def P28 : PTXReg<"p28">; +def P29 : PTXReg<"p29">; +def P30 : PTXReg<"p30">; +def P31 : PTXReg<"p31">; +def P32 : PTXReg<"p32">; +def P33 : PTXReg<"p33">; +def P34 : PTXReg<"p34">; +def P35 : PTXReg<"p35">; +def P36 : PTXReg<"p36">; +def P37 : PTXReg<"p37">; +def P38 : PTXReg<"p38">; +def P39 : PTXReg<"p39">; +def P40 : PTXReg<"p40">; +def P41 : PTXReg<"p41">; +def P42 : PTXReg<"p42">; +def P43 : PTXReg<"p43">; +def P44 : PTXReg<"p44">; +def P45 : PTXReg<"p45">; +def P46 : PTXReg<"p46">; +def P47 : PTXReg<"p47">; +def P48 : PTXReg<"p48">; +def P49 : PTXReg<"p49">; +def P50 : PTXReg<"p50">; +def P51 : PTXReg<"p51">; +def P52 : PTXReg<"p52">; +def P53 : PTXReg<"p53">; +def P54 : PTXReg<"p54">; +def P55 : PTXReg<"p55">; +def P56 : PTXReg<"p56">; +def P57 : PTXReg<"p57">; +def P58 : PTXReg<"p58">; +def P59 : PTXReg<"p59">; +def P60 : PTXReg<"p60">; +def P61 : PTXReg<"p61">; +def P62 : PTXReg<"p62">; +def P63 : PTXReg<"p63">; +def P64 : PTXReg<"p64">; +def P65 : PTXReg<"p65">; +def P66 : PTXReg<"p66">; +def P67 : PTXReg<"p67">; +def P68 : PTXReg<"p68">; +def P69 : PTXReg<"p69">; +def P70 : PTXReg<"p70">; +def P71 : PTXReg<"p71">; +def P72 : PTXReg<"p72">; +def P73 : PTXReg<"p73">; +def P74 : PTXReg<"p74">; +def P75 : PTXReg<"p75">; +def P76 : PTXReg<"p76">; +def P77 : PTXReg<"p77">; +def P78 : PTXReg<"p78">; +def P79 : PTXReg<"p79">; +def P80 : PTXReg<"p80">; +def P81 : PTXReg<"p81">; +def P82 : PTXReg<"p82">; +def P83 : PTXReg<"p83">; +def P84 : PTXReg<"p84">; +def P85 : PTXReg<"p85">; +def P86 : PTXReg<"p86">; +def P87 : PTXReg<"p87">; +def P88 : PTXReg<"p88">; +def P89 : PTXReg<"p89">; +def P90 : PTXReg<"p90">; +def P91 : PTXReg<"p91">; +def P92 : PTXReg<"p92">; +def P93 : PTXReg<"p93">; +def P94 : PTXReg<"p94">; +def P95 : PTXReg<"p95">; +def P96 : PTXReg<"p96">; +def P97 : PTXReg<"p97">; +def P98 : PTXReg<"p98">; +def P99 : PTXReg<"p99">; +def P100 : PTXReg<"p100">; +def P101 : PTXReg<"p101">; +def P102 : PTXReg<"p102">; +def P103 : PTXReg<"p103">; +def P104 : PTXReg<"p104">; +def P105 : PTXReg<"p105">; +def P106 : PTXReg<"p106">; +def P107 : PTXReg<"p107">; +def P108 : PTXReg<"p108">; +def P109 : PTXReg<"p109">; +def P110 : PTXReg<"p110">; +def P111 : PTXReg<"p111">; +def P112 : PTXReg<"p112">; +def P113 : PTXReg<"p113">; +def P114 : PTXReg<"p114">; +def P115 : PTXReg<"p115">; +def P116 : PTXReg<"p116">; +def P117 : PTXReg<"p117">; +def P118 : PTXReg<"p118">; +def P119 : PTXReg<"p119">; +def P120 : PTXReg<"p120">; +def P121 : PTXReg<"p121">; +def P122 : PTXReg<"p122">; +def P123 : PTXReg<"p123">; +def P124 : PTXReg<"p124">; +def P125 : PTXReg<"p125">; +def P126 : PTXReg<"p126">; +def P127 : PTXReg<"p127">; -///===- 16-bit Integer Registers ------------------------------------------===// +///===- 16-Bit Registers --------------------------------------------------===// -def RH0 : PTXReg<"rh0">; -def RH1 : PTXReg<"rh1">; -def RH2 : PTXReg<"rh2">; -def RH3 : PTXReg<"rh3">; -def RH4 : PTXReg<"rh4">; -def RH5 : PTXReg<"rh5">; -def RH6 : PTXReg<"rh6">; -def RH7 : PTXReg<"rh7">; +def RH0 : PTXReg<"rh0">; +def RH1 : PTXReg<"rh1">; +def RH2 : PTXReg<"rh2">; +def RH3 : PTXReg<"rh3">; +def RH4 : PTXReg<"rh4">; +def RH5 : PTXReg<"rh5">; +def RH6 : PTXReg<"rh6">; +def RH7 : PTXReg<"rh7">; +def RH8 : PTXReg<"rh8">; +def RH9 : PTXReg<"rh9">; +def RH10 : PTXReg<"rh10">; +def RH11 : PTXReg<"rh11">; +def RH12 : PTXReg<"rh12">; +def RH13 : PTXReg<"rh13">; +def RH14 : PTXReg<"rh14">; +def RH15 : PTXReg<"rh15">; +def RH16 : PTXReg<"rh16">; +def RH17 : PTXReg<"rh17">; +def RH18 : PTXReg<"rh18">; +def RH19 : PTXReg<"rh19">; +def RH20 : PTXReg<"rh20">; +def RH21 : PTXReg<"rh21">; +def RH22 : PTXReg<"rh22">; +def RH23 : PTXReg<"rh23">; +def RH24 : PTXReg<"rh24">; +def RH25 : PTXReg<"rh25">; +def RH26 : PTXReg<"rh26">; +def RH27 : PTXReg<"rh27">; +def RH28 : PTXReg<"rh28">; +def RH29 : PTXReg<"rh29">; +def RH30 : PTXReg<"rh30">; +def RH31 : PTXReg<"rh31">; +def RH32 : PTXReg<"rh32">; +def RH33 : PTXReg<"rh33">; +def RH34 : PTXReg<"rh34">; +def RH35 : PTXReg<"rh35">; +def RH36 : PTXReg<"rh36">; +def RH37 : PTXReg<"rh37">; +def RH38 : PTXReg<"rh38">; +def RH39 : PTXReg<"rh39">; +def RH40 : PTXReg<"rh40">; +def RH41 : PTXReg<"rh41">; +def RH42 : PTXReg<"rh42">; +def RH43 : PTXReg<"rh43">; +def RH44 : PTXReg<"rh44">; +def RH45 : PTXReg<"rh45">; +def RH46 : PTXReg<"rh46">; +def RH47 : PTXReg<"rh47">; +def RH48 : PTXReg<"rh48">; +def RH49 : PTXReg<"rh49">; +def RH50 : PTXReg<"rh50">; +def RH51 : PTXReg<"rh51">; +def RH52 : PTXReg<"rh52">; +def RH53 : PTXReg<"rh53">; +def RH54 : PTXReg<"rh54">; +def RH55 : PTXReg<"rh55">; +def RH56 : PTXReg<"rh56">; +def RH57 : PTXReg<"rh57">; +def RH58 : PTXReg<"rh58">; +def RH59 : PTXReg<"rh59">; +def RH60 : PTXReg<"rh60">; +def RH61 : PTXReg<"rh61">; +def RH62 : PTXReg<"rh62">; +def RH63 : PTXReg<"rh63">; +def RH64 : PTXReg<"rh64">; +def RH65 : PTXReg<"rh65">; +def RH66 : PTXReg<"rh66">; +def RH67 : PTXReg<"rh67">; +def RH68 : PTXReg<"rh68">; +def RH69 : PTXReg<"rh69">; +def RH70 : PTXReg<"rh70">; +def RH71 : PTXReg<"rh71">; +def RH72 : PTXReg<"rh72">; +def RH73 : PTXReg<"rh73">; +def RH74 : PTXReg<"rh74">; +def RH75 : PTXReg<"rh75">; +def RH76 : PTXReg<"rh76">; +def RH77 : PTXReg<"rh77">; +def RH78 : PTXReg<"rh78">; +def RH79 : PTXReg<"rh79">; +def RH80 : PTXReg<"rh80">; +def RH81 : PTXReg<"rh81">; +def RH82 : PTXReg<"rh82">; +def RH83 : PTXReg<"rh83">; +def RH84 : PTXReg<"rh84">; +def RH85 : PTXReg<"rh85">; +def RH86 : PTXReg<"rh86">; +def RH87 : PTXReg<"rh87">; +def RH88 : PTXReg<"rh88">; +def RH89 : PTXReg<"rh89">; +def RH90 : PTXReg<"rh90">; +def RH91 : PTXReg<"rh91">; +def RH92 : PTXReg<"rh92">; +def RH93 : PTXReg<"rh93">; +def RH94 : PTXReg<"rh94">; +def RH95 : PTXReg<"rh95">; +def RH96 : PTXReg<"rh96">; +def RH97 : PTXReg<"rh97">; +def RH98 : PTXReg<"rh98">; +def RH99 : PTXReg<"rh99">; +def RH100 : PTXReg<"rh100">; +def RH101 : PTXReg<"rh101">; +def RH102 : PTXReg<"rh102">; +def RH103 : PTXReg<"rh103">; +def RH104 : PTXReg<"rh104">; +def RH105 : PTXReg<"rh105">; +def RH106 : PTXReg<"rh106">; +def RH107 : PTXReg<"rh107">; +def RH108 : PTXReg<"rh108">; +def RH109 : PTXReg<"rh109">; +def RH110 : PTXReg<"rh110">; +def RH111 : PTXReg<"rh111">; +def RH112 : PTXReg<"rh112">; +def RH113 : PTXReg<"rh113">; +def RH114 : PTXReg<"rh114">; +def RH115 : PTXReg<"rh115">; +def RH116 : PTXReg<"rh116">; +def RH117 : PTXReg<"rh117">; +def RH118 : PTXReg<"rh118">; +def RH119 : PTXReg<"rh119">; +def RH120 : PTXReg<"rh120">; +def RH121 : PTXReg<"rh121">; +def RH122 : PTXReg<"rh122">; +def RH123 : PTXReg<"rh123">; +def RH124 : PTXReg<"rh124">; +def RH125 : PTXReg<"rh125">; +def RH126 : PTXReg<"rh126">; +def RH127 : PTXReg<"rh127">; -///===- 32-bit Integer Registers ------------------------------------------===// +///===- 32-Bit Registers --------------------------------------------------===// -def R0 : PTXReg<"r0">; -def R1 : PTXReg<"r1">; -def R2 : PTXReg<"r2">; -def R3 : PTXReg<"r3">; -def R4 : PTXReg<"r4">; -def R5 : PTXReg<"r5">; -def R6 : PTXReg<"r6">; -def R7 : PTXReg<"r7">; +def R0 : PTXReg<"r0">; +def R1 : PTXReg<"r1">; +def R2 : PTXReg<"r2">; +def R3 : PTXReg<"r3">; +def R4 : PTXReg<"r4">; +def R5 : PTXReg<"r5">; +def R6 : PTXReg<"r6">; +def R7 : PTXReg<"r7">; +def R8 : PTXReg<"r8">; +def R9 : PTXReg<"r9">; +def R10 : PTXReg<"r10">; +def R11 : PTXReg<"r11">; +def R12 : PTXReg<"r12">; +def R13 : PTXReg<"r13">; +def R14 : PTXReg<"r14">; +def R15 : PTXReg<"r15">; +def R16 : PTXReg<"r16">; +def R17 : PTXReg<"r17">; +def R18 : PTXReg<"r18">; +def R19 : PTXReg<"r19">; +def R20 : PTXReg<"r20">; +def R21 : PTXReg<"r21">; +def R22 : PTXReg<"r22">; +def R23 : PTXReg<"r23">; +def R24 : PTXReg<"r24">; +def R25 : PTXReg<"r25">; +def R26 : PTXReg<"r26">; +def R27 : PTXReg<"r27">; +def R28 : PTXReg<"r28">; +def R29 : PTXReg<"r29">; +def R30 : PTXReg<"r30">; +def R31 : PTXReg<"r31">; +def R32 : PTXReg<"r32">; +def R33 : PTXReg<"r33">; +def R34 : PTXReg<"r34">; +def R35 : PTXReg<"r35">; +def R36 : PTXReg<"r36">; +def R37 : PTXReg<"r37">; +def R38 : PTXReg<"r38">; +def R39 : PTXReg<"r39">; +def R40 : PTXReg<"r40">; +def R41 : PTXReg<"r41">; +def R42 : PTXReg<"r42">; +def R43 : PTXReg<"r43">; +def R44 : PTXReg<"r44">; +def R45 : PTXReg<"r45">; +def R46 : PTXReg<"r46">; +def R47 : PTXReg<"r47">; +def R48 : PTXReg<"r48">; +def R49 : PTXReg<"r49">; +def R50 : PTXReg<"r50">; +def R51 : PTXReg<"r51">; +def R52 : PTXReg<"r52">; +def R53 : PTXReg<"r53">; +def R54 : PTXReg<"r54">; +def R55 : PTXReg<"r55">; +def R56 : PTXReg<"r56">; +def R57 : PTXReg<"r57">; +def R58 : PTXReg<"r58">; +def R59 : PTXReg<"r59">; +def R60 : PTXReg<"r60">; +def R61 : PTXReg<"r61">; +def R62 : PTXReg<"r62">; +def R63 : PTXReg<"r63">; +def R64 : PTXReg<"r64">; +def R65 : PTXReg<"r65">; +def R66 : PTXReg<"r66">; +def R67 : PTXReg<"r67">; +def R68 : PTXReg<"r68">; +def R69 : PTXReg<"r69">; +def R70 : PTXReg<"r70">; +def R71 : PTXReg<"r71">; +def R72 : PTXReg<"r72">; +def R73 : PTXReg<"r73">; +def R74 : PTXReg<"r74">; +def R75 : PTXReg<"r75">; +def R76 : PTXReg<"r76">; +def R77 : PTXReg<"r77">; +def R78 : PTXReg<"r78">; +def R79 : PTXReg<"r79">; +def R80 : PTXReg<"r80">; +def R81 : PTXReg<"r81">; +def R82 : PTXReg<"r82">; +def R83 : PTXReg<"r83">; +def R84 : PTXReg<"r84">; +def R85 : PTXReg<"r85">; +def R86 : PTXReg<"r86">; +def R87 : PTXReg<"r87">; +def R88 : PTXReg<"r88">; +def R89 : PTXReg<"r89">; +def R90 : PTXReg<"r90">; +def R91 : PTXReg<"r91">; +def R92 : PTXReg<"r92">; +def R93 : PTXReg<"r93">; +def R94 : PTXReg<"r94">; +def R95 : PTXReg<"r95">; +def R96 : PTXReg<"r96">; +def R97 : PTXReg<"r97">; +def R98 : PTXReg<"r98">; +def R99 : PTXReg<"r99">; +def R100 : PTXReg<"r100">; +def R101 : PTXReg<"r101">; +def R102 : PTXReg<"r102">; +def R103 : PTXReg<"r103">; +def R104 : PTXReg<"r104">; +def R105 : PTXReg<"r105">; +def R106 : PTXReg<"r106">; +def R107 : PTXReg<"r107">; +def R108 : PTXReg<"r108">; +def R109 : PTXReg<"r109">; +def R110 : PTXReg<"r110">; +def R111 : PTXReg<"r111">; +def R112 : PTXReg<"r112">; +def R113 : PTXReg<"r113">; +def R114 : PTXReg<"r114">; +def R115 : PTXReg<"r115">; +def R116 : PTXReg<"r116">; +def R117 : PTXReg<"r117">; +def R118 : PTXReg<"r118">; +def R119 : PTXReg<"r119">; +def R120 : PTXReg<"r120">; +def R121 : PTXReg<"r121">; +def R122 : PTXReg<"r122">; +def R123 : PTXReg<"r123">; +def R124 : PTXReg<"r124">; +def R125 : PTXReg<"r125">; +def R126 : PTXReg<"r126">; +def R127 : PTXReg<"r127">; -///===- 64-bit Integer Registers ------------------------------------------===// +///===- 64-Bit Registers --------------------------------------------------===// -def RD0 : PTXReg<"rd0">; -def RD1 : PTXReg<"rd1">; -def RD2 : PTXReg<"rd2">; -def RD3 : PTXReg<"rd3">; -def RD4 : PTXReg<"rd4">; -def RD5 : PTXReg<"rd5">; -def RD6 : PTXReg<"rd6">; -def RD7 : PTXReg<"rd7">; +def RD0 : PTXReg<"rd0">; +def RD1 : PTXReg<"rd1">; +def RD2 : PTXReg<"rd2">; +def RD3 : PTXReg<"rd3">; +def RD4 : PTXReg<"rd4">; +def RD5 : PTXReg<"rd5">; +def RD6 : PTXReg<"rd6">; +def RD7 : PTXReg<"rd7">; +def RD8 : PTXReg<"rd8">; +def RD9 : PTXReg<"rd9">; +def RD10 : PTXReg<"rd10">; +def RD11 : PTXReg<"rd11">; +def RD12 : PTXReg<"rd12">; +def RD13 : PTXReg<"rd13">; +def RD14 : PTXReg<"rd14">; +def RD15 : PTXReg<"rd15">; +def RD16 : PTXReg<"rd16">; +def RD17 : PTXReg<"rd17">; +def RD18 : PTXReg<"rd18">; +def RD19 : PTXReg<"rd19">; +def RD20 : PTXReg<"rd20">; +def RD21 : PTXReg<"rd21">; +def RD22 : PTXReg<"rd22">; +def RD23 : PTXReg<"rd23">; +def RD24 : PTXReg<"rd24">; +def RD25 : PTXReg<"rd25">; +def RD26 : PTXReg<"rd26">; +def RD27 : PTXReg<"rd27">; +def RD28 : PTXReg<"rd28">; +def RD29 : PTXReg<"rd29">; +def RD30 : PTXReg<"rd30">; +def RD31 : PTXReg<"rd31">; +def RD32 : PTXReg<"rd32">; +def RD33 : PTXReg<"rd33">; +def RD34 : PTXReg<"rd34">; +def RD35 : PTXReg<"rd35">; +def RD36 : PTXReg<"rd36">; +def RD37 : PTXReg<"rd37">; +def RD38 : PTXReg<"rd38">; +def RD39 : PTXReg<"rd39">; +def RD40 : PTXReg<"rd40">; +def RD41 : PTXReg<"rd41">; +def RD42 : PTXReg<"rd42">; +def RD43 : PTXReg<"rd43">; +def RD44 : PTXReg<"rd44">; +def RD45 : PTXReg<"rd45">; +def RD46 : PTXReg<"rd46">; +def RD47 : PTXReg<"rd47">; +def RD48 : PTXReg<"rd48">; +def RD49 : PTXReg<"rd49">; +def RD50 : PTXReg<"rd50">; +def RD51 : PTXReg<"rd51">; +def RD52 : PTXReg<"rd52">; +def RD53 : PTXReg<"rd53">; +def RD54 : PTXReg<"rd54">; +def RD55 : PTXReg<"rd55">; +def RD56 : PTXReg<"rd56">; +def RD57 : PTXReg<"rd57">; +def RD58 : PTXReg<"rd58">; +def RD59 : PTXReg<"rd59">; +def RD60 : PTXReg<"rd60">; +def RD61 : PTXReg<"rd61">; +def RD62 : PTXReg<"rd62">; +def RD63 : PTXReg<"rd63">; +def RD64 : PTXReg<"rd64">; +def RD65 : PTXReg<"rd65">; +def RD66 : PTXReg<"rd66">; +def RD67 : PTXReg<"rd67">; +def RD68 : PTXReg<"rd68">; +def RD69 : PTXReg<"rd69">; +def RD70 : PTXReg<"rd70">; +def RD71 : PTXReg<"rd71">; +def RD72 : PTXReg<"rd72">; +def RD73 : PTXReg<"rd73">; +def RD74 : PTXReg<"rd74">; +def RD75 : PTXReg<"rd75">; +def RD76 : PTXReg<"rd76">; +def RD77 : PTXReg<"rd77">; +def RD78 : PTXReg<"rd78">; +def RD79 : PTXReg<"rd79">; +def RD80 : PTXReg<"rd80">; +def RD81 : PTXReg<"rd81">; +def RD82 : PTXReg<"rd82">; +def RD83 : PTXReg<"rd83">; +def RD84 : PTXReg<"rd84">; +def RD85 : PTXReg<"rd85">; +def RD86 : PTXReg<"rd86">; +def RD87 : PTXReg<"rd87">; +def RD88 : PTXReg<"rd88">; +def RD89 : PTXReg<"rd89">; +def RD90 : PTXReg<"rd90">; +def RD91 : PTXReg<"rd91">; +def RD92 : PTXReg<"rd92">; +def RD93 : PTXReg<"rd93">; +def RD94 : PTXReg<"rd94">; +def RD95 : PTXReg<"rd95">; +def RD96 : PTXReg<"rd96">; +def RD97 : PTXReg<"rd97">; +def RD98 : PTXReg<"rd98">; +def RD99 : PTXReg<"rd99">; +def RD100 : PTXReg<"rd100">; +def RD101 : PTXReg<"rd101">; +def RD102 : PTXReg<"rd102">; +def RD103 : PTXReg<"rd103">; +def RD104 : PTXReg<"rd104">; +def RD105 : PTXReg<"rd105">; +def RD106 : PTXReg<"rd106">; +def RD107 : PTXReg<"rd107">; +def RD108 : PTXReg<"rd108">; +def RD109 : PTXReg<"rd109">; +def RD110 : PTXReg<"rd110">; +def RD111 : PTXReg<"rd111">; +def RD112 : PTXReg<"rd112">; +def RD113 : PTXReg<"rd113">; +def RD114 : PTXReg<"rd114">; +def RD115 : PTXReg<"rd115">; +def RD116 : PTXReg<"rd116">; +def RD117 : PTXReg<"rd117">; +def RD118 : PTXReg<"rd118">; +def RD119 : PTXReg<"rd119">; +def RD120 : PTXReg<"rd120">; +def RD121 : PTXReg<"rd121">; +def RD122 : PTXReg<"rd122">; +def RD123 : PTXReg<"rd123">; +def RD124 : PTXReg<"rd124">; +def RD125 : PTXReg<"rd125">; +def RD126 : PTXReg<"rd126">; +def RD127 : PTXReg<"rd127">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// - -def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 7)>; -def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 7)>; -def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 7)>; -def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 7)>; -def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 7)>; -def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 7)>; +def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>; diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index e8a1dfe..f8941b6 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -16,23 +16,35 @@ using namespace llvm; -PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS, - bool is64Bit) - : PTXShaderModel(PTX_SM_1_0), +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) + : PTXTarget(PTX_COMPUTE_1_0), PTXVersion(PTX_VERSION_2_0), SupportsDouble(false), SupportsFMA(true), - Is64Bit(is64Bit) { - std::string TARGET = "generic"; + Is64Bit(is64Bit) { + std::string TARGET = CPU; + if (TARGET.empty()) + TARGET = "generic"; ParseSubtargetFeatures(FS, TARGET); } std::string PTXSubtarget::getTargetString() const { - switch(PTXShaderModel) { - default: llvm_unreachable("Unknown shader model"); + switch(PTXTarget) { + default: llvm_unreachable("Unknown PTX target"); case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_1: return "sm_11"; + case PTX_SM_1_2: return "sm_12"; case PTX_SM_1_3: return "sm_13"; case PTX_SM_2_0: return "sm_20"; + case PTX_SM_2_1: return "sm_21"; + case PTX_SM_2_2: return "sm_22"; + case PTX_SM_2_3: return "sm_23"; + case PTX_COMPUTE_1_0: return "compute_10"; + case PTX_COMPUTE_1_1: return "compute_11"; + case PTX_COMPUTE_1_2: return "compute_12"; + case PTX_COMPUTE_1_3: return "compute_13"; + case PTX_COMPUTE_2_0: return "compute_20"; } } diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index c8f8c3b..6d03377 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -18,15 +18,28 @@ namespace llvm { class PTXSubtarget : public TargetSubtarget { - private: + public: /** * Enumeration of Shader Models supported by the back-end. */ - enum PTXShaderModelEnum { + enum PTXTargetEnum { + PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */ + PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */ + PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */ + PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */ + PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */ + PTX_LAST_COMPUTE, + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_1, /*< Shader Model 1.1 */ + PTX_SM_1_2, /*< Shader Model 1.2 */ PTX_SM_1_3, /*< Shader Model 1.3 */ - PTX_SM_2_0 /*< Shader Model 2.0 */ + PTX_SM_2_0, /*< Shader Model 2.0 */ + PTX_SM_2_1, /*< Shader Model 2.1 */ + PTX_SM_2_2, /*< Shader Model 2.2 */ + PTX_SM_2_3, /*< Shader Model 2.3 */ + PTX_LAST_SM }; /** @@ -41,8 +54,10 @@ namespace llvm { PTX_VERSION_2_3 /*< PTX Version 2.3 */ }; + private: + /// Shader Model supported on the target GPU. - PTXShaderModelEnum PTXShaderModel; + PTXTargetEnum PTXTarget; /// PTX Language Version. PTXVersionEnum PTXVersion; @@ -58,8 +73,11 @@ namespace llvm { bool Is64Bit; public: - PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit); + PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); + + // Target architecture accessors std::string getTargetString() const; std::string getPTXVersionString() const; @@ -70,18 +88,29 @@ namespace llvm { bool supportsFMA() const { return SupportsFMA; } - bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; } - - bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; } - bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; } - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + bool fdivNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool fmadNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool useParamSpaceForDeviceArgs() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + void ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); }; // class PTXSubtarget } // namespace llvm diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 1b737c9..ef648c6 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -52,11 +52,12 @@ namespace { // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64Bit) : LLVMTargetMachine(T, TT), DataLayout(is64Bit ? DataLayout64 : DataLayout32), - Subtarget(TT, FS, is64Bit), + Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), InstrInfo(*this), TLInfo(*this) { @@ -64,14 +65,16 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, PTX32TargetMachine::PTX32TargetMachine(const Target &T, const std::string& TT, + const std::string& CPU, const std::string& FS) - : PTXTargetMachine(T, TT, FS, false) { + : PTXTargetMachine(T, TT, CPU, FS, false) { } PTX64TargetMachine::PTX64TargetMachine(const Target &T, const std::string& TT, + const std::string& CPU, const std::string& FS) - : PTXTargetMachine(T, TT, FS, true) { + : PTXTargetMachine(T, TT, CPU, FS, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 149be8e..ae42153 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -33,7 +33,8 @@ class PTXTargetMachine : public LLVMTargetMachine { public: PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64Bit); + const std::string &CPU, const std::string &FS, + bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -61,14 +62,14 @@ class PTX32TargetMachine : public PTXTargetMachine { public: PTX32TargetMachine(const Target &T, const std::string &TT, - const std::string& FS); + const std::string& CPU, const std::string& FS); }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { public: PTX64TargetMachine(const Target &T, const std::string &TT, - const std::string& FS); + const std::string& CPU, const std::string& FS); }; // class PTX32TargetMachine } // namespace llvm diff --git a/lib/Target/PTX/generate-register-td.py b/lib/Target/PTX/generate-register-td.py new file mode 100755 index 0000000..1528690 --- /dev/null +++ b/lib/Target/PTX/generate-register-td.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +##===- generate-register-td.py --------------------------------*-python-*--===## +## +## The LLVM Compiler Infrastructure +## +## This file is distributed under the University of Illinois Open Source +## License. See LICENSE.TXT for details. +## +##===----------------------------------------------------------------------===## +## +## This file describes the PTX register file generator. +## +##===----------------------------------------------------------------------===## + +from sys import argv, exit, stdout + + +if len(argv) != 5: + print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>') + exit(1) + +try: + num_pred = int(argv[1]) + num_16bit = int(argv[2]) + num_32bit = int(argv[3]) + num_64bit = int(argv[4]) +except: + print('ERROR: Invalid integer parameter') + exit(1) + +## Print the register definition file +td_file = open('PTXRegisterInfo.td', 'w') + +td_file.write(''' +//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class PTXReg<string n> : Register<n> { + let Namespace = "PTX"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// +''') + + +# Print predicate registers +td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n') +for r in range(0, num_pred): + td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r)) + +# Print 16-bit registers +td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_16bit): + td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r)) + +# Print 32-bit registers +td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_32bit): + td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r)) + +# Print 64-bit registers +td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_64bit): + td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r)) + + +td_file.write(''' +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +''') + + +# Print register classes + +td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1)) +td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1)) +td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) +td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) + + +td_file.close() + +## Now write the PTXCallingConv.td file +td_file = open('PTXCallingConv.td', 'w') + +# Reserve 10% of the available registers for return values, and the other 90% +# for parameters +num_ret_pred = int(0.1 * num_pred) +num_ret_16bit = int(0.1 * num_16bit) +num_ret_32bit = int(0.1 * num_32bit) +num_ret_64bit = int(0.1 * num_64bit) +num_param_pred = num_pred - num_ret_pred +num_param_16bit = num_16bit - num_ret_16bit +num_param_32bit = num_32bit - num_ret_32bit +num_param_64bit = num_64bit - num_ret_64bit + +param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)] +ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)] +param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)] +ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)] +param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)] +ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)] +param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)] +ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)] + +param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred) +ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred) +param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit) +ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit) +param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit) +ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit) +param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit) +ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit) + +td_file.write(''' +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// PTX Formal Parameter Calling Convention +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; + +// PTX Return Value Calling Convention +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; +''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit, + ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit)) + + +td_file.close() diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index f282579..ea11f4c 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -1,13 +1,10 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) -tablegen(PPCGenInstrNames.inc -gen-instr-enums) -tablegen(PPCGenRegisterNames.inc -gen-register-enums) tablegen(PPCGenAsmWriter.inc -gen-asm-writer) tablegen(PPCGenCodeEmitter.inc -gen-emitter) tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) -tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PPCGenRegisterInfo.inc -gen-register-desc) -tablegen(PPCGenInstrInfo.inc -gen-instr-desc) +tablegen(PPCGenRegisterInfo.inc -gen-register-info) +tablegen(PPCGenInstrInfo.inc -gen-instr-info) tablegen(PPCGenDAGISel.inc -gen-dag-isel) tablegen(PPCGenCallingConv.inc -gen-callingconv) tablegen(PPCGenSubtarget.inc -gen-subtarget) diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index 030defe..2a18db7 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -12,9 +12,8 @@ LIBRARYNAME = LLVMPowerPCCodeGen TARGET = PPC # Make sure that tblgen is run, first thing. -BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \ +BUILT_SOURCES = PPCGenRegisterInfo.inc \ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ - PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtarget.inc PPCGenCallingConv.inc \ PPCGenMCCodeEmitter.inc diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 92672b5..55852e6 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -84,10 +84,12 @@ namespace llvm { // Defines symbolic names for PowerPC registers. This defines a mapping from // register name to register number. // -#include "PPCGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "PPCGenRegisterInfo.inc" // Defines symbolic names for the PowerPC instructions. // -#include "PPCGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "PPCGenInstrInfo.inc" #endif diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp index f562a3f..4b8cbb7 100644 --- a/lib/Target/PowerPC/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/PPCAsmBackend.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; @@ -23,6 +24,11 @@ public: PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} + + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) {} }; class PPCAsmBackend : public TargetAsmBackend { diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 74ecff5..cddc9d8 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -73,12 +73,12 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, } Opcode = ~Opcode; - const TargetInstrDesc &TID = TII.get(Opcode); + const MCInstrDesc &MCID = TII.get(Opcode); - isLoad = TID.mayLoad(); - isStore = TID.mayStore(); + isLoad = MCID.mayLoad(); + isStore = MCID.mayStore(); - uint64_t TSFlags = TID.TSFlags; + uint64_t TSFlags = MCID.TSFlags; isFirst = TSFlags & PPCII::PPC970_First; isSingle = TSFlags & PPCII::PPC970_Single; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c9b490b..b44b6c3 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -215,10 +215,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::VASTART , MVT::Other, Custom); // VAARG is custom lowered with the 32-bit SVR4 ABI. - if ( TM.getSubtarget<PPCSubtarget>().isSVR4ABI() - && !TM.getSubtarget<PPCSubtarget>().isPPC64()) + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI() + && !TM.getSubtarget<PPCSubtarget>().isPPC64()) { setOperationAction(ISD::VAARG, MVT::Other, Custom); - else + setOperationAction(ISD::VAARG, MVT::i64, Custom); + } else setOperationAction(ISD::VAARG, MVT::Other, Expand); // Use the default implementation. @@ -1262,9 +1263,110 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + DebugLoc dl = Node->getDebugLoc(); + + assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); + + // gpr_index + SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, + VAListPtr, MachinePointerInfo(SV), MVT::i8, + false, false, 0); + InChain = GprIndex.getValue(1); + + if (VT == MVT::i64) { + // Check if GprIndex is even + SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, + DAG.getConstant(1, MVT::i32)); + SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, + DAG.getConstant(0, MVT::i32), ISD::SETNE); + SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, + DAG.getConstant(1, MVT::i32)); + // Align GprIndex to be even if it isn't + GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, + GprIndex); + } + + // fpr index is 1 byte after gpr + SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(1, MVT::i32)); + + // fpr + SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, + FprPtr, MachinePointerInfo(SV), MVT::i8, + false, false, 0); + InChain = FprIndex.getValue(1); + + SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(8, MVT::i32)); + + SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(4, MVT::i32)); - llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!"); - return SDValue(); // Not reached + // areas + SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, + MachinePointerInfo(), false, false, 0); + InChain = OverflowArea.getValue(1); + + SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, + MachinePointerInfo(), false, false, 0); + InChain = RegSaveArea.getValue(1); + + // select overflow_area if index > 8 + SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(8, MVT::i32), ISD::SETLT); + + SDValue Area = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, RegSaveArea, + OverflowArea); + + // adjustment constant gpr_index * 4/8 + SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, + VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(VT.isInteger() ? 4 : 8, + MVT::i32)); + + // OurReg = RegSaveArea + RegConstant + SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, + RegConstant); + + // Floating types are 32 bytes into RegSaveArea + if (VT.isFloatingPoint()) + OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, + DAG.getConstant(32, MVT::i32)); + + // increase {f,g}pr_index by 1 (or 2 if VT is i64) + SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, + VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(VT == MVT::i64 ? 2 : 1, + MVT::i32)); + + InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, + VT.isInteger() ? VAListPtr : FprPtr, + MachinePointerInfo(SV), + MVT::i8, false, false, 0); + + // determine if we should load from reg_save_area or overflow_area + SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); + + // increase overflow_area by 4/8 if gpr/fpr > 8 + SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, + DAG.getConstant(VT.isInteger() ? 4 : 8, + MVT::i32)); + + OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, + OverflowAreaPlusN); + + InChain = DAG.getTruncStore(InChain, dl, OverflowArea, + OverflowAreaPtr, + MachinePointerInfo(), + MVT::i32, false, false, 0); + + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); } SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, @@ -4429,11 +4531,27 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const { + const TargetMachine &TM = getTargetMachine(); DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::VAARG: { + if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() + || TM.getSubtarget<PPCSubtarget>().isPPC64()) + return; + + EVT VT = N->getValueType(0); + + if (VT == MVT::i64) { + SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); + + Results.push_back(NewNode); + Results.push_back(NewNode.getValue(1)); + } + return; + } case ISD::FP_ROUND_INREG: { assert(N->getValueType(0) == MVT::ppcf128); assert(N->getOperand(0).getValueType() == MVT::ppcf128); diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 53b0491..1ddc0f0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -15,7 +15,6 @@ #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCPredicates.h" -#include "PPCGenInstrInfo.inc" #include "PPCTargetMachine.h" #include "PPCHazardRecognizers.h" #include "llvm/ADT/STLExtras.h" @@ -29,6 +28,9 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/MC/MCAsmInfo.h" +#define GET_INSTRINFO_MC_DESC +#include "PPCGenInstrInfo.inc" + namespace llvm { extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. @@ -37,8 +39,9 @@ extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. using namespace llvm; PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) - : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), - RI(*TM.getSubtargetImpl(), *this) {} + : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts), + PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), + TM(tm), RI(*TM.getSubtargetImpl(), *this) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for /// this target when scheduling the DAG. @@ -120,7 +123,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // destination register as well. if (Reg0 == Reg1) { // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); Reg2IsKill = false; ChangeReg0 = true; @@ -315,12 +318,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else llvm_unreachable("Impossible reg-to-reg copy"); - const TargetInstrDesc &TID = get(Opc); - if (TID.getNumOperands() == 3) - BuildMI(MBB, I, DL, TID, DestReg) + const MCInstrDesc &MCID = get(Opc); + if (MCID.getNumOperands() == 3) + BuildMI(MBB, I, DL, MCID, DestReg) .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); else - BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } bool diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index fd62a88..db139da 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -44,6 +44,10 @@ #include "llvm/ADT/STLExtras.h" #include <cstdlib> +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "PPCGenRegisterInfo.inc" + // FIXME (64-bit): Eventually enable by default. namespace llvm { cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", @@ -110,8 +114,7 @@ unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) { PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) - : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), - Subtarget(ST), TII(tii) { + : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -710,5 +713,3 @@ int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour); } - -#include "PPCGenRegisterInfo.inc" diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 48c2562..33fe5eb 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -16,9 +16,11 @@ #define POWERPC32_REGISTERINFO_H #include "PPC.h" -#include "PPCGenRegisterInfo.h.inc" #include <map> +#define GET_REGINFO_HEADER +#include "PPCGenRegisterInfo.inc" + namespace llvm { class PPCSubtarget; class TargetInstrInfo; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 5f3aa23..bcc4c21 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -57,8 +57,8 @@ static const char *GetCurrentPowerPCCPU() { #endif -PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, - bool is64Bit) +PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) : StackAlignment(16) , DarwinDirective(PPC::DIR_NONE) , IsGigaProcessor(false) @@ -73,13 +73,16 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, , TargetTriple(TT) { // Determine default and user specified characteristics - std::string CPU = "generic"; + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; #if defined(__APPLE__) - CPU = GetCurrentPowerPCCPU(); + if (CPUName == "generic") + CPUName = GetCurrentPowerPCCPU(); #endif // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(FS, CPUName); // If we are generating code for ppc64, verify that options make sense. if (is64Bit) { diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 8fd1a44..55c3fef 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -14,10 +14,9 @@ #ifndef POWERPCSUBTARGET_H #define POWERPCSUBTARGET_H -#include "llvm/ADT/Triple.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetSubtarget.h" - +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/ADT/Triple.h" #include <string> // GCC #defines PPC on Linux but we use it as our namespace name @@ -73,12 +72,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - PPCSubtarget(const std::string &TT, const std::string &FS, bool is64Bit); + PPCSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); /// SetJITMode - This is called to inform the subtarget info that we are diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index d27e54e..09fc1e3 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -67,9 +67,10 @@ extern "C" void LLVMInitializePowerPCTarget() { PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64Bit) : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, is64Bit), + Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), @@ -88,14 +89,16 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : PPCTargetMachine(T, TT, FS, false) { + : PPCTargetMachine(T, TT, CPU, FS, false) { } PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : PPCTargetMachine(T, TT, FS, true) { + : PPCTargetMachine(T, TT, CPU, FS, true) { } diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 2d24989..baf07e3 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -41,7 +41,8 @@ class PPCTargetMachine : public LLVMTargetMachine { public: PPCTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64Bit); + const std::string &CPU, const std::string &FS, + bool is64Bit); virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameLowering *getFrameLowering() const { @@ -77,7 +78,7 @@ public: class PPC32TargetMachine : public PPCTargetMachine { public: PPC32TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; /// PPC64TargetMachine - PowerPC 64-bit target machine. @@ -85,7 +86,7 @@ public: class PPC64TargetMachine : public PPCTargetMachine { public: PPC64TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; } // end namespace llvm diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 6839234..f3c691f 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td) -tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SparcGenRegisterNames.inc -gen-register-enums) -tablegen(SparcGenRegisterInfo.inc -gen-register-desc) -tablegen(SparcGenInstrNames.inc -gen-instr-enums) -tablegen(SparcGenInstrInfo.inc -gen-instr-desc) +tablegen(SparcGenRegisterInfo.inc -gen-register-info) +tablegen(SparcGenInstrInfo.inc -gen-instr-info) tablegen(SparcGenAsmWriter.inc -gen-asm-writer) tablegen(SparcGenDAGISel.inc -gen-dag-isel) tablegen(SparcGenSubtarget.inc -gen-subtarget) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 4b12852..dab35e5 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -298,7 +298,7 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB, return false; if (candidate->getOpcode() == SP::UNIMP) return true; - const TargetInstrDesc &prevdesc = (--candidate)->getDesc(); + const MCInstrDesc &prevdesc = (--candidate)->getDesc(); return prevdesc.hasDelaySlot(); } diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile index 27942c5..c8741b5 100644 --- a/lib/Target/Sparc/Makefile +++ b/lib/Target/Sparc/Makefile @@ -12,9 +12,8 @@ LIBRARYNAME = LLVMSparcCodeGen TARGET = Sparc # Make sure that tblgen is run, first thing. -BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \ - SparcGenRegisterInfo.inc SparcGenInstrNames.inc \ - SparcGenInstrInfo.inc SparcGenAsmWriter.inc \ +BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \ + SparcGenAsmWriter.inc \ SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc DIRS = TargetInfo diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index a37920d..d68535b 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -36,11 +36,13 @@ namespace llvm { // Defines symbolic names for Sparc registers. This defines a mapping from // register name to register number. // -#include "SparcGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "SparcGenRegisterInfo.inc" // Defines symbolic names for the Sparc instructions. // -#include "SparcGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "SparcGenInstrInfo.inc" namespace llvm { diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 0b4612d..a2bda6c 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1,4 +1,3 @@ - //===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===// // // The LLVM Compiler Infrastructure @@ -1265,26 +1264,6 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -std::vector<unsigned> SparcTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { - default: break; - case 'r': - return make_vector<unsigned>(SP::L0, SP::L1, SP::L2, SP::L3, - SP::L4, SP::L5, SP::L6, SP::L7, - SP::I0, SP::I1, SP::I2, SP::I3, - SP::I4, SP::I5, - SP::O0, SP::O1, SP::O2, SP::O3, - SP::O4, SP::O5, SP::O7, 0); - } - - return std::vector<unsigned>(); -} - bool SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Sparc target isn't yet aware of offsets. diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 9ea6e16..8a1886a 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -65,9 +65,6 @@ namespace llvm { ConstraintType getConstraintType(const std::string &Constraint) const; std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index afa3c1f..e555b79 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -19,12 +19,16 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" -#include "SparcGenInstrInfo.inc" #include "SparcMachineFunctionInfo.h" + +#define GET_INSTRINFO_MC_DESC +#include "SparcGenInstrInfo.inc" + using namespace llvm; SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) - : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts)), + : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts), + SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(ST, *this), Subtarget(ST) { } diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 9fcf028..3b0b5fa 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -23,12 +23,16 @@ #include "llvm/Type.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" + +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "SparcGenRegisterInfo.inc" + using namespace llvm; SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii) - : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), - Subtarget(st), TII(tii) { + : SparcGenRegisterInfo(), Subtarget(st), TII(tii) { } const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) @@ -135,6 +139,3 @@ int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); } - -#include "SparcGenRegisterInfo.inc" - diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 56c8068..ec9e63a 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -15,7 +15,9 @@ #define SPARCREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "SparcGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "SparcGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index ce11af1..06bfc64 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -15,20 +15,23 @@ #include "SparcGenSubtarget.inc" using namespace llvm; -SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS, - bool is64Bit) : +SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) : IsV9(false), V8DeprecatedInsts(false), IsVIS(false), Is64Bit(is64Bit) { // Determine default and user specified characteristics - const char *CPU = "v8"; - if (is64Bit) { - CPU = "v9"; - IsV9 = true; + std::string CPUName = CPU; + if (CPUName.empty()) { + if (is64Bit) + CPUName = "v9"; + else + CPUName = "v8"; } + IsV9 = CPUName == "v9"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(FS, CPUName); } diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index cec0ab4..eabf390 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -26,7 +26,8 @@ class SparcSubtarget : public TargetSubtarget { bool Is64Bit; public: - SparcSubtarget(const std::string &TT, const std::string &FS, bool is64bit); + SparcSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64bit); bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } @@ -34,8 +35,7 @@ public: /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); bool is64Bit() const { return Is64Bit; } std::string getDataLayout() const { diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index b84eab5..792dd94 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -30,9 +30,10 @@ extern "C" void LLVMInitializeSparcTarget() { /// SparcTargetMachine ctor - Create an ILP32 architecture model /// SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64bit) : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, is64bit), + Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), FrameLowering(Subtarget) { @@ -56,12 +57,14 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : SparcTargetMachine(T, TT, FS, false) { + : SparcTargetMachine(T, TT, CPU, FS, false) { } SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : SparcTargetMachine(T, TT, FS, true) { + : SparcTargetMachine(T, TT, CPU, FS, true) { } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index c4bb6bd..799fc49 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -34,7 +34,8 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcFrameLowering FrameLowering; public: SparcTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64bit); + const std::string &CPU, const std::string &FS, + bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { @@ -62,7 +63,7 @@ public: class SparcV8TargetMachine : public SparcTargetMachine { public: SparcV8TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; /// SparcV9TargetMachine - Sparc 64-bit target machine @@ -70,7 +71,7 @@ public: class SparcV9TargetMachine : public SparcTargetMachine { public: SparcV9TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; } // end namespace llvm diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 1f5d355..47c7a9f 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS SystemZ.td) -tablegen(SystemZGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SystemZGenRegisterNames.inc -gen-register-enums) -tablegen(SystemZGenRegisterInfo.inc -gen-register-desc) -tablegen(SystemZGenInstrNames.inc -gen-instr-enums) -tablegen(SystemZGenInstrInfo.inc -gen-instr-desc) +tablegen(SystemZGenRegisterInfo.inc -gen-register-info) +tablegen(SystemZGenInstrInfo.inc -gen-instr-info) tablegen(SystemZGenAsmWriter.inc -gen-asm-writer) tablegen(SystemZGenDAGISel.inc -gen-dag-isel) tablegen(SystemZGenCallingConv.inc -gen-callingconv) diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile index 6930e14..682f343 100644 --- a/lib/Target/SystemZ/Makefile +++ b/lib/Target/SystemZ/Makefile @@ -12,9 +12,8 @@ LIBRARYNAME = LLVMSystemZCodeGen TARGET = SystemZ # Make sure that tblgen is run, first thing. -BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \ - SystemZGenRegisterInfo.inc SystemZGenInstrNames.inc \ - SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \ +BUILT_SOURCES = SystemZGenRegisterInfo.inc SystemZGenInstrInfo.inc \ + SystemZGenAsmWriter.inc \ SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc DIRS = TargetInfo diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index ea5240a..84d83c0 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -53,9 +53,11 @@ namespace llvm { // Defines symbolic names for SystemZ registers. // This defines a mapping from register name to register number. -#include "SystemZGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "SystemZGenRegisterInfo.inc" // Defines symbolic names for the SystemZ instructions. -#include "SystemZGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "SystemZGenInstrInfo.inc" #endif diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h index 2f2ef08..ab45ec5 100644 --- a/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -108,11 +108,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { MachineInstr *MI = MIB; MachineFunction &MF = *MI->getParent()->getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); unsigned Flags = 0; - if (TID.mayLoad()) + if (MCID.mayLoad()) Flags |= MachineMemOperand::MOLoad; - if (TID.mayStore()) + if (MCID.mayStore()) Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo( diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index be52803..71ba9f9 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -16,17 +16,21 @@ #include "SystemZInstrInfo.h" #include "SystemZMachineFunctionInfo.h" #include "SystemZTargetMachine.h" -#include "SystemZGenInstrInfo.inc" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_MC_DESC +#include "SystemZGenInstrInfo.inc" + using namespace llvm; SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) - : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)), + : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts), + SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN), RI(tm, *this), TM(tm) { } @@ -199,13 +203,13 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { } bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } @@ -343,7 +347,7 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } -const TargetInstrDesc& +const MCInstrDesc& SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const { switch (CC) { default: @@ -408,7 +412,7 @@ SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const { } } -const TargetInstrDesc& +const MCInstrDesc& SystemZInstrInfo::getLongDispOpc(unsigned Opc) const { switch (Opc) { default: diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 6cb7200..a39c21e 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -94,10 +94,10 @@ public: SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const; SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const; - const TargetInstrDesc& getBrCond(SystemZCC::CondCodes CC) const; - const TargetInstrDesc& getLongDispOpc(unsigned Opc) const; + const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const; + const MCInstrDesc& getLongDispOpc(unsigned Opc) const; - const TargetInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const { + const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const { if (Offset < 0 || Offset >= 4096) return getLongDispOpc(Opc); else diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index d5c165f..21421a9 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -25,12 +25,16 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" + +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "SystemZGenRegisterInfo.inc" + using namespace llvm; SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii) - : SystemZGenRegisterInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN), - TM(tm), TII(tii) { + : SystemZGenRegisterInfo(), TM(tm), TII(tii) { } const unsigned* @@ -153,6 +157,3 @@ int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { assert(0 && "What is the dwarf register number"); return -1; } - - -#include "SystemZGenRegisterInfo.inc" diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index cd8f20f..2e262e1 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -15,7 +15,9 @@ #define SystemZREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "SystemZGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "SystemZGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index a8b5e1f..95521b2 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -20,12 +20,15 @@ using namespace llvm; SystemZSubtarget::SystemZSubtarget(const std::string &TT, + const std::string &CPU, const std::string &FS): HasZ10Insts(false) { - std::string CPU = "z9"; + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "z9"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(FS, CPUName); } /// True if accessing the GV requires an extra load. diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 405d6e9..453471c 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -28,12 +28,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - SystemZSubtarget(const std::string &TT, const std::string &FS); + SystemZSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); bool isZ10() const { return HasZ10Insts; } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 1603899..3329ce6 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -24,9 +24,10 @@ extern "C" void LLVMInitializeSystemZTarget() { /// SystemZTargetMachine::SystemZTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) : LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" "-f64:64:64-f128:128:128-a0:16:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 524f83d..e40b556 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -38,7 +38,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { SystemZFrameLowering FrameLowering; public: SystemZTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index d4b7697..2931416 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -12,44 +12,46 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/ErrorHandling.h" #include <cctype> using namespace llvm; //===----------------------------------------------------------------------===// -// TargetOperandInfo +// TargetInstrInfo //===----------------------------------------------------------------------===// -/// getRegClass - Get the register class for the operand, handling resolution -/// of "symbolic" pointer register classes etc. If this is not a register -/// operand, this returns null. -const TargetRegisterClass * -TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const { - if (isLookupPtrRegClass()) +TargetInstrInfo::TargetInstrInfo(const MCInstrDesc* Desc, unsigned numOpcodes, + int CFSetupOpcode, int CFDestroyOpcode) + : CallFrameSetupOpcode(CFSetupOpcode), + CallFrameDestroyOpcode(CFDestroyOpcode) { + InitMCInstrInfo(Desc, numOpcodes); +} + +TargetInstrInfo::~TargetInstrInfo() { +} + +const TargetRegisterClass* +TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + if (OpNum >= MCID.getNumOperands()) + return 0; + + short RegClass = MCID.OpInfo[OpNum].RegClass; + if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) return TRI->getPointerRegClass(RegClass); + // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) return 0; + // Otherwise just look it up normally. return TRI->getRegClass(RegClass); } -//===----------------------------------------------------------------------===// -// TargetInstrInfo -//===----------------------------------------------------------------------===// - -TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc, - unsigned numOpcodes) - : Descriptors(Desc), NumOpcodes(numOpcodes) { -} - -TargetInstrInfo::~TargetInstrInfo() { -} - unsigned TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const { @@ -135,13 +137,13 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 863b811..14044f2 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -43,7 +43,7 @@ namespace llvm { Reloc::Model RelocationModel; CodeModel::Model CMModel; bool GuaranteedTailCallOpt; - unsigned StackAlignment; + unsigned StackAlignmentOverride; bool RealignStack; bool DisableJumpTables; bool StrongPHIElim; @@ -183,7 +183,7 @@ EnableGuaranteedTailCallOpt("tailcallopt", static cl::opt<unsigned, true> OverrideStackAlignment("stack-alignment", cl::desc("Override default stack alignment"), - cl::location(StackAlignment), + cl::location(StackAlignmentOverride), cl::init(0)); static cl::opt<bool, true> EnableRealignStack("realign-stack", diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index bae3343..90a8f8d 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -20,17 +20,11 @@ using namespace llvm; -TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, +TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, - const char *const *subregindexnames, - int CFSO, int CFDO) - : Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR), + const char *const *subregindexnames) + : InfoDesc(ID), SubRegIndexNames(subregindexnames), RegClassBegin(RCB), RegClassEnd(RCE) { - assert(isPhysicalRegister(NumRegs) && - "Target has too many physical registers!"); - - CallFrameSetupOpcode = CFSO; - CallFrameDestroyOpcode = CFDO; } TargetRegisterInfo::~TargetRegisterInfo() {} @@ -86,7 +80,7 @@ static void getAllocatableSetForRC(const MachineFunction &MF, BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, const TargetRegisterClass *RC) const { - BitVector Allocatable(NumRegs); + BitVector Allocatable(getNumRegs()); if (RC) { getAllocatableSetForRC(MF, RC, Allocatable); } else { diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index b5fa94f..50464e8 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -1,11 +1,8 @@ set(LLVM_TARGET_DEFINITIONS X86.td) -tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(X86GenRegisterNames.inc -gen-register-enums) -tablegen(X86GenRegisterInfo.inc -gen-register-desc) +tablegen(X86GenRegisterInfo.inc -gen-register-info) tablegen(X86GenDisassemblerTables.inc -gen-disassembler) -tablegen(X86GenInstrNames.inc -gen-instr-enums) -tablegen(X86GenInstrInfo.inc -gen-instr-desc) +tablegen(X86GenInstrInfo.inc -gen-instr-info) tablegen(X86GenAsmWriter.inc -gen-asm-writer) tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) tablegen(X86GenAsmMatcher.inc -gen-asm-matcher) @@ -60,5 +57,6 @@ add_llvm_target(X86CodeGen ${sources}) add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) add_subdirectory(TargetInfo) add_subdirectory(Utils) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index d8a105e..4a0d2ec 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -26,7 +26,8 @@ #include "llvm/Support/MemoryObject.h" #include "llvm/Support/raw_ostream.h" -#include "X86GenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "X86GenRegisterInfo.inc" #include "X86GenEDInfo.inc" using namespace llvm; diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 68247d2..53738b1 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -16,23 +16,20 @@ #include "X86ATTInstPrinter.h" #include "X86InstComments.h" #include "X86Subtarget.h" +#include "MCTargetDesc/X86TargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include "X86GenInstrNames.inc" #include <map> using namespace llvm; // Include the auto-generated portion of the assembly writer. #define GET_INSTRUCTION_NAME #define PRINT_ALIAS_INSTR -#include "X86GenRegisterNames.inc" #include "X86GenAsmWriter.inc" -#undef PRINT_ALIAS_INSTR -#undef GET_INSTRUCTION_NAME X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) : MCInstPrinter(MAI) { diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index c642acc..5461c83 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86InstComments.h" -#include "X86GenInstrNames.inc" +#include "MCTargetDesc/X86TargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" #include "../Utils/X86ShuffleDecode.h" diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 5f581ba..411d832 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -16,12 +16,12 @@ #include "X86IntelInstPrinter.h" #include "X86InstComments.h" #include "X86Subtarget.h" +#include "MCTargetDesc/X86TargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "X86GenInstrNames.inc" #include <cctype> using namespace llvm; diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..50be61c --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,2 @@ +add_llvm_library(LLVMX86Desc X86TargetDesc.cpp) + diff --git a/lib/Target/X86/MCTargetDesc/Makefile b/lib/Target/X86/MCTargetDesc/Makefile new file mode 100644 index 0000000..b19774e --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Desc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp new file mode 100644 index 0000000..44d1097 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp @@ -0,0 +1,46 @@ +//===-- X86TargetDesc.cpp - X86 Target Descriptions -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "X86TargetDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_REGINFO_MC_DESC +#include "X86GenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "X86GenInstrInfo.inc" + +using namespace llvm; + +MCInstrInfo *createX86MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitX86MCInstrInfo(X); + return X; +} + +MCRegisterInfo *createX86MCRegisterInfo() { + MCRegisterInfo *X = new MCRegisterInfo(); + InitX86MCRegisterInfo(X); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeX86MCRegInfo() { + RegisterMCRegInfo<MCRegisterInfo> X(TheX86_32Target); + RegisterMCRegInfo<MCRegisterInfo> Y(TheX86_64Target); + + TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); +} diff --git a/lib/Target/X86/MCTargetDesc/X86TargetDesc.h b/lib/Target/X86/MCTargetDesc/X86TargetDesc.h new file mode 100644 index 0000000..9ab622d --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86TargetDesc.h @@ -0,0 +1,34 @@ +//===-- X86TargetDesc.h - X86 Target Descriptions ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef X86TARGETDESC_H +#define X86TARGETDESC_H + +namespace llvm { +class Target; + +extern Target TheX86_32Target, TheX86_64Target; +} // End llvm namespace + +// Defines symbolic names for X86 registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "X86GenRegisterInfo.inc" + +// Defines symbolic names for the X86 instructions. +// +#define GET_INSTRINFO_ENUM +#include "X86GenInstrInfo.inc" + +#endif diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 12fb090..25da367 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -12,14 +12,13 @@ LIBRARYNAME = LLVMX86CodeGen TARGET = X86 # Make sure that tblgen is run, first thing. -BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ - X86GenRegisterInfo.inc X86GenInstrNames.inc \ - X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \ +BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \ + X86GenAsmWriter.inc X86GenAsmMatcher.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenDisassemblerTables.inc X86GenFastISel.inc \ X86GenCallingConv.inc X86GenSubtarget.inc \ X86GenEDInfo.inc -DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 0ca4366..9d66c2f 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -15,6 +15,7 @@ #ifndef TARGET_X86_H #define TARGET_X86_H +#include "MCTargetDesc/X86TargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" @@ -84,17 +85,6 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); -extern Target TheX86_32Target, TheX86_64Target; - } // End llvm namespace -// Defines symbolic names for X86 registers. This defines a mapping from -// register name to register number. -// -#include "X86GenRegisterNames.inc" - -// Defines symbolic names for the X86 instructions. -// -#include "X86GenInstrNames.inc" - #endif diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 421e221..4b11db7 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -68,7 +68,7 @@ namespace { return "X86 Machine Code Emitter"; } - void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc); + void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -132,7 +132,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { MCE.StartMachineBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - const TargetInstrDesc &Desc = I->getDesc(); + const MCInstrDesc &Desc = I->getDesc(); emitInstruction(*I, &Desc); // MOVPC32r is basically a call plus a pop instruction. if (Desc.getOpcode() == X86::MOVPC32r) @@ -150,7 +150,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { /// size, and 3) use of X86-64 extended registers. static unsigned determineREX(const MachineInstr &MI) { unsigned REX = 0; - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); // Pseudo instructions do not need REX prefix byte. if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) @@ -161,7 +161,7 @@ static unsigned determineREX(const MachineInstr &MI) { unsigned NumOps = Desc.getNumOperands(); if (NumOps) { bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; @@ -598,7 +598,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, template<class CodeEmitter> void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, - const TargetInstrDesc *Desc) { + const MCInstrDesc *Desc) { DEBUG(dbgs() << MI); // If this is a pseudo instruction, lower it. @@ -708,9 +708,9 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, // If this is a two-address instruction, skip one of the register operands. unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1) ++CurOp; - else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index f1b9972..21e163a 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -15,6 +15,7 @@ #include "X86.h" #include "X86InstrBuilder.h" +#include "X86ISelLowering.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -1392,7 +1393,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { assert(DI->getAddress() && "Null address should be checked earlier!"); if (!X86SelectAddress(DI->getAddress(), AM)) return false; - const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM). @@ -1493,7 +1494,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { return false; // Fast-isel doesn't know about callee-pop yet. - if (Subtarget->IsCalleePop(isVarArg, CC)) + if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, + GuaranteedTailCallOpt)) return false; // Check whether the function can return without sret-demotion. @@ -1628,7 +1630,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { unsigned NumBytes = CCInfo.getNextStackOffset(); // Issue CALLSEQ_START - unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) .addImm(NumBytes); @@ -1801,7 +1803,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { MIB.addReg(RegArgs[i]); // Issue CALLSEQ_END - unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); unsigned NumBytesCallee = 0; if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet)) NumBytesCallee = 4; @@ -1846,16 +1848,19 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // stack, but where we prefer to use the value in xmm registers, copy it // out as F80 and use a truncate to move it from fp stack reg to xmm reg. if ((RVLocs[i].getLocReg() == X86::ST0 || - RVLocs[i].getLocReg() == X86::ST1) && - isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { - CopyVT = MVT::f80; - CopyReg = createResultReg(X86::RFP80RegisterClass); + RVLocs[i].getLocReg() == X86::ST1)) { + if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { + CopyVT = MVT::f80; + CopyReg = createResultReg(X86::RFP80RegisterClass); + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL), + CopyReg); + } else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CopyReg).addReg(RVLocs[i].getLocReg()); + UsedRegs.push_back(RVLocs[i].getLocReg()); } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - CopyReg).addReg(RVLocs[i].getLocReg()); - UsedRegs.push_back(RVLocs[i].getLocReg()); - if (CopyVT != RVLocs[i].getValVT()) { // Round the F80 the right size, which also moves to the appropriate xmm // register. This is accomplished by storing the F80 value in memory and diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 325d061..463cde0 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -126,10 +127,45 @@ namespace { void bundleCFG(MachineFunction &MF); MachineBasicBlock *MBB; // Current basic block + + // The hardware keeps track of how many FP registers are live, so we have + // to model that exactly. Usually, each live register corresponds to an + // FP<n> register, but when dealing with calls, returns, and inline + // assembly, it is sometimes neccesary to have live scratch registers. unsigned Stack[8]; // FP<n> Registers in each stack slot... - unsigned RegMap[8]; // Track which stack slot contains each register unsigned StackTop; // The current top of the FP stack. + enum { + NumFPRegs = 16 // Including scratch pseudo-registers. + }; + + // For each live FP<n> register, point to its Stack[] entry. + // The first entries correspond to FP0-FP6, the rest are scratch registers + // used when we need slightly different live registers than what the + // register allocator thinks. + unsigned RegMap[NumFPRegs]; + + // Pending fixed registers - Inline assembly needs FP registers to appear + // in fixed stack slot positions. This is handled by copying FP registers + // to ST registers before the instruction, and copying back after the + // instruction. + // + // This is modeled with pending ST registers. NumPendingSTs is the number + // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP + // register that holds the ST value. The ST registers are not moved into + // place until immediately before the instruction that needs them. + // + // It can happen that we need an ST register to be live when no FP register + // holds the value: + // + // %ST0 = COPY %FP4<kill> + // + // When that happens, we allocate a scratch FP register to hold the ST + // value. That means every register in PendingST must be live. + + unsigned NumPendingSTs; + unsigned char PendingST[8]; + // Set up our stack model to match the incoming registers to MBB. void setupBlockStack(); @@ -142,13 +178,15 @@ namespace { dbgs() << " FP" << Stack[i]; assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); } + for (unsigned i = 0; i != NumPendingSTs; ++i) + dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]); dbgs() << "\n"; } /// getSlot - Return the stack slot number a particular register number is /// in. unsigned getSlot(unsigned RegNo) const { - assert(RegNo < 8 && "Regno out of range!"); + assert(RegNo < NumFPRegs && "Regno out of range!"); return RegMap[RegNo]; } @@ -160,12 +198,17 @@ namespace { /// getScratchReg - Return an FP register that is not currently in use. unsigned getScratchReg() { - for (int i = 7; i >= 0; --i) + for (int i = NumFPRegs - 1; i >= 8; --i) if (!isLive(i)) return i; llvm_unreachable("Ran out of scratch FP registers"); } + /// isScratchReg - Returns trus if RegNo is a scratch FP register. + bool isScratchReg(unsigned RegNo) { + return RegNo > 8 && RegNo < NumFPRegs; + } + /// getStackEntry - Return the X86::FP<n> register in register ST(i). unsigned getStackEntry(unsigned STi) const { if (STi >= StackTop) @@ -181,7 +224,7 @@ namespace { // pushReg - Push the specified FP<n> register onto the stack. void pushReg(unsigned Reg) { - assert(Reg < 8 && "Register number out of range!"); + assert(Reg < NumFPRegs && "Register number out of range!"); if (StackTop >= 8) report_fatal_error("Stack overflow!"); Stack[StackTop] = Reg; @@ -236,7 +279,7 @@ namespace { /// Adjust the live registers to be the set in Mask. void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); - /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is + /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is /// st(0), FP reg FixStack[1] is st(1) etc. void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, MachineBasicBlock::iterator I); @@ -251,7 +294,14 @@ namespace { void handleCondMovFP(MachineBasicBlock::iterator &I); void handleSpecialFP(MachineBasicBlock::iterator &I); - bool translateCopy(MachineInstr*); + // Check if a COPY instruction is using FP registers. + bool isFPCopy(MachineInstr *MI) { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + return X86::RFP80RegClass.contains(DstReg) || + X86::RFP80RegClass.contains(SrcReg); + } }; char FPS::ID = 0; } @@ -341,6 +391,7 @@ void FPS::bundleCFG(MachineFunction &MF) { bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; MBB = &BB; + NumPendingSTs = 0; setupBlockStack(); @@ -352,7 +403,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; - if (MI->isCopy() && translateCopy(MI)) + if (MI->isCopy() && isFPCopy(MI)) FPInstClass = X86II::SpecialFP; if (FPInstClass == X86II::NotFP) @@ -881,7 +932,8 @@ void FPS::shuffleStackTop(const unsigned char *FixStack, continue; // (Reg st0) (OldReg st0) = (Reg OldReg st0) moveToTop(Reg, I); - moveToTop(OldReg, I); + if (FixCount > 0) + moveToTop(OldReg, I); } DEBUG(dumpStack()); } @@ -1239,142 +1291,309 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; switch (MI->getOpcode()) { default: llvm_unreachable("Unknown SpecialFP instruction!"); - case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! - case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! - case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! - assert(StackTop == 0 && "Stack should be empty after a call!"); - pushReg(getFPReg(MI->getOperand(0))); - break; - case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! - case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! - case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! - // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. - // The pattern we expect is: - // CALL - // FP1 = FpGET_ST0 - // FP4 = FpGET_ST1 - // - // At this point, we've pushed FP1 on the top of stack, so it should be - // present if it isn't dead. If it was dead, we already emitted a pop to - // remove it from the stack and StackTop = 0. - - // Push FP4 as top of stack next. - pushReg(getFPReg(MI->getOperand(0))); + case TargetOpcode::COPY: { + // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP. + const MachineOperand &MO1 = MI->getOperand(1); + const MachineOperand &MO0 = MI->getOperand(0); + unsigned DstST = MO0.getReg() - X86::ST0; + unsigned SrcST = MO1.getReg() - X86::ST0; + bool KillsSrc = MI->killsRegister(MO1.getReg()); + + // ST = COPY FP. Set up a pending ST register. + if (DstST < 8) { + unsigned SrcFP = getFPReg(MO1); + assert(isLive(SrcFP) && "Cannot copy dead register"); + assert(!MO0.isDead() && "Cannot copy to dead ST register"); + + // Unallocated STs are marked as the nonexistent FP255. + while (NumPendingSTs <= DstST) + PendingST[NumPendingSTs++] = NumFPRegs; + + // STi could still be live from a previous inline asm. + if (isScratchReg(PendingST[DstST])) { + DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST]) + << '\n'); + freeStackSlotBefore(MI, PendingST[DstST]); + } - // If StackTop was 0 before we pushed our operand, then ST(0) must have been - // dead. In this case, the ST(1) value is the only thing that is live, so - // it should be on the TOS (after the pop that was emitted) and is. Just - // continue in this case. - if (StackTop == 1) + // When the source is killed, allocate a scratch FP register. + if (KillsSrc) { + unsigned Slot = getSlot(SrcFP); + unsigned SR = getScratchReg(); + PendingST[DstST] = SR; + Stack[Slot] = SR; + RegMap[SR] = Slot; + } else + PendingST[DstST] = SrcFP; break; - - // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top - // elements so that our accounting is correct. - unsigned RegOnTop = getStackEntry(0); - unsigned RegNo = getStackEntry(1); - - // Swap the slots the regs are in. - std::swap(RegMap[RegNo], RegMap[RegOnTop]); - - // Swap stack slot contents. - if (RegMap[RegOnTop] >= StackTop) - report_fatal_error("Access past stack top!"); - std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); - break; - } - case X86::FpSET_ST0_32: - case X86::FpSET_ST0_64: - case X86::FpSET_ST0_80: { - // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm - // arguments that use an st constraint. We expect a sequence of - // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM - unsigned Op0 = getFPReg(MI->getOperand(0)); - - if (!MI->killsRegister(X86::FP0 + Op0)) { - // Duplicate Op0 into a temporary on the stack top. - duplicateToTop(Op0, getScratchReg(), I); - } else { - // Op0 is killed, so just swap it into position. - moveToTop(Op0, I); } - --StackTop; // "Forget" we have something on the top of stack! - break; - } - case X86::FpSET_ST1_32: - case X86::FpSET_ST1_64: - case X86::FpSET_ST1_80: { - // Set up st(1) for inline asm. We are assuming that st(0) has already been - // set up by FpSET_ST0, and our StackTop is off by one because of it. - unsigned Op0 = getFPReg(MI->getOperand(0)); - // Restore the actual StackTop from before Fp_SET_ST0. - // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we - // are not enforcing the constraint. - ++StackTop; - unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). - if (!MI->killsRegister(X86::FP0 + Op0)) { - duplicateToTop(Op0, getScratchReg(), I); - moveToTop(RegOnTop, I); - } else if (getSTReg(Op0) != X86::ST1) { - // We have the wrong value at st(1). Shuffle! Untested! - moveToTop(getStackEntry(1), I); - moveToTop(Op0, I); - moveToTop(RegOnTop, I); + + // FP = COPY ST. Extract fixed stack value. + // Any instruction defining ST registers must have assigned them to a + // scratch register. + if (SrcST < 8) { + unsigned DstFP = getFPReg(MO0); + assert(!isLive(DstFP) && "Cannot copy ST to live FP register"); + assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register"); + unsigned SrcFP = PendingST[SrcST]; + assert(isScratchReg(SrcFP) && "Expected ST in a scratch register"); + assert(isLive(SrcFP) && "Scratch holding ST is dead"); + + // DstFP steals the stack slot from SrcFP. + unsigned Slot = getSlot(SrcFP); + Stack[Slot] = DstFP; + RegMap[DstFP] = Slot; + + // Always treat the ST as killed. + PendingST[SrcST] = NumFPRegs; + while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs) + --NumPendingSTs; + break; } - assert(StackTop >= 2 && "Too few live registers"); - StackTop -= 2; // "Forget" both st(0) and st(1). - break; - } - case X86::MOV_Fp3232: - case X86::MOV_Fp3264: - case X86::MOV_Fp6432: - case X86::MOV_Fp6464: - case X86::MOV_Fp3280: - case X86::MOV_Fp6480: - case X86::MOV_Fp8032: - case X86::MOV_Fp8064: - case X86::MOV_Fp8080: { - const MachineOperand &MO1 = MI->getOperand(1); - unsigned SrcReg = getFPReg(MO1); - const MachineOperand &MO0 = MI->getOperand(0); - unsigned DestReg = getFPReg(MO0); - if (MI->killsRegister(X86::FP0+SrcReg)) { + // FP <- FP copy. + unsigned DstFP = getFPReg(MO0); + unsigned SrcFP = getFPReg(MO1); + assert(isLive(SrcFP) && "Cannot copy dead register"); + if (KillsSrc) { // If the input operand is killed, we can just change the owner of the // incoming stack slot into the result. - unsigned Slot = getSlot(SrcReg); - assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); - Stack[Slot] = DestReg; - RegMap[DestReg] = Slot; - + unsigned Slot = getSlot(SrcFP); + Stack[Slot] = DstFP; + RegMap[DstFP] = Slot; } else { - // For FMOV we just duplicate the specified value to a new stack slot. + // For COPY we just duplicate the specified value to a new stack slot. // This could be made better, but would require substantial changes. - duplicateToTop(SrcReg, DestReg, I); + duplicateToTop(SrcFP, DstFP, I); } + break; + } + + case X86::FpPOP_RETVAL: { + // The FpPOP_RETVAL instruction is used after calls that return a value on + // the floating point stack. We cannot model this with ST defs since CALL + // instructions have fixed clobber lists. This instruction is interpreted + // to mean that there is one more live register on the stack than we + // thought. + // + // This means that StackTop does not match the hardware stack between a + // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions + // between CALL and FpPOP_RETVAL as long as they don't overflow the + // hardware stack. + unsigned DstFP = getFPReg(MI->getOperand(0)); + + // Move existing stack elements up to reflect reality. + assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL"); + if (StackTop) { + std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1); + for (unsigned i = 0; i != NumFPRegs; ++i) + ++RegMap[i]; } + ++StackTop; + + // DstFP is the new bottom of the stack. + Stack[0] = DstFP; + RegMap[DstFP] = 0; + + // DstFP will be killed by processBasicBlock if this was a dead def. break; + } + case TargetOpcode::INLINEASM: { // The inline asm MachineInstr currently only *uses* FP registers for the // 'f' constraint. These should be turned into the current ST(x) register - // in the machine instr. Also, any kills should be explicitly popped after - // the inline asm. - unsigned Kills = 0; + // in the machine instr. + // + // There are special rules for x87 inline assembly. The compiler must know + // exactly how many registers are popped and pushed implicitly by the asm. + // Otherwise it is not possible to restore the stack state after the inline + // asm. + // + // There are 3 kinds of input operands: + // + // 1. Popped inputs. These must appear at the stack top in ST0-STn. A + // popped input operand must be in a fixed stack slot, and it is either + // tied to an output operand, or in the clobber list. The MI has ST use + // and def operands for these inputs. + // + // 2. Fixed inputs. These inputs appear in fixed stack slots, but are + // preserved by the inline asm. The fixed stack slots must be STn-STm + // following the popped inputs. A fixed input operand cannot be tied to + // an output or appear in the clobber list. The MI has ST use operands + // and no defs for these inputs. + // + // 3. Preserved inputs. These inputs use the "f" constraint which is + // represented as an FP register. The inline asm won't change these + // stack slots. + // + // Outputs must be in ST registers, FP outputs are not allowed. Clobbered + // registers do not count as output operands. The inline asm changes the + // stack as if it popped all the popped inputs and then pushed all the + // output operands. + + // Scan the assembly for ST registers used, defined and clobbered. We can + // only tell clobbers from defs by looking at the asm descriptor. + unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0; + unsigned NumOps = 0; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands(); + i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) { + unsigned Flags = MI->getOperand(i).getImm(); + NumOps = InlineAsm::getNumOperandRegisters(Flags); + if (NumOps != 1) + continue; + const MachineOperand &MO = MI->getOperand(i + 1); + if (!MO.isReg()) + continue; + unsigned STReg = MO.getReg() - X86::ST0; + if (STReg >= 8) + continue; + + switch (InlineAsm::getKind(Flags)) { + case InlineAsm::Kind_RegUse: + STUses |= (1u << STReg); + break; + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: + STDefs |= (1u << STReg); + if (MO.isDead()) + STDeadDefs |= (1u << STReg); + break; + case InlineAsm::Kind_Clobber: + STClobbers |= (1u << STReg); + break; + default: + break; + } + } + + if (STUses && !isMask_32(STUses)) + report_fatal_error("Inline asm fixed input regs" + " must be last on the x87 stack"); + unsigned NumSTUses = CountTrailingOnes_32(STUses); + + // Defs must be contiguous from the stack top. ST0-STn. + if (STDefs && !isMask_32(STDefs)) + report_fatal_error("Inline asm output regs" + " must be last on the x87 stack"); + unsigned NumSTDefs = CountTrailingOnes_32(STDefs); + + // So must the clobbered stack slots. ST0-STm, m >= n. + if (STClobbers && !isMask_32(STDefs | STClobbers)) + report_fatal_error("Inline asm clobbers must be last on the x87 stack"); + + // Popped inputs are the ones that are also clobbered or defined. + unsigned STPopped = STUses & (STDefs | STClobbers); + if (STPopped && !isMask_32(STPopped)) + report_fatal_error("Inline asm implicitly popped regs" + " must be last on the x87 stack"); + unsigned NumSTPopped = CountTrailingOnes_32(STPopped); + + DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops " + << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n"); + + // Scan the instruction for FP uses corresponding to "f" constraints. + // Collect FP registers to kill afer the instruction. + // Always kill all the scratch regs. + unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff; + unsigned FPUsed = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) continue; - assert(Op.isUse() && "Only handle inline asm uses right now"); - + if (!Op.isUse()) + report_fatal_error("Illegal \"f\" output constraint in inline asm"); unsigned FPReg = getFPReg(Op); - Op.setReg(getSTReg(FPReg)); - + FPUsed |= 1U << FPReg; + // If we kill this operand, make sure to pop it from the stack after the // asm. We just remember it for now, and pop them all off at the end in // a batch. if (Op.isKill()) - Kills |= 1U << FPReg; + FPKills |= 1U << FPReg; + } + + // The popped inputs will be killed by the instruction, so duplicate them + // if the FP register needs to be live after the instruction, or if it is + // used in the instruction itself. We effectively treat the popped inputs + // as early clobbers. + for (unsigned i = 0; i < NumSTPopped; ++i) { + if ((FPKills & ~FPUsed) & (1u << PendingST[i])) + continue; + unsigned SR = getScratchReg(); + duplicateToTop(PendingST[i], SR, I); + DEBUG(dbgs() << "Duplicating ST" << i << " in FP" + << unsigned(PendingST[i]) << " to avoid clobbering it.\n"); + PendingST[i] = SR; } + // Make sure we have a unique live register for every fixed use. Some of + // them could be undef uses, and we need to emit LD_F0 instructions. + for (unsigned i = 0; i < NumSTUses; ++i) { + if (i < NumPendingSTs && PendingST[i] < NumFPRegs) { + // Check for shared assignments. + for (unsigned j = 0; j < i; ++j) { + if (PendingST[j] != PendingST[i]) + continue; + // STi and STj are inn the same register, create a copy. + unsigned SR = getScratchReg(); + duplicateToTop(PendingST[i], SR, I); + DEBUG(dbgs() << "Duplicating ST" << i << " in FP" + << unsigned(PendingST[i]) + << " to avoid collision with ST" << j << '\n'); + PendingST[i] = SR; + } + continue; + } + unsigned SR = getScratchReg(); + DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n'); + BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0)); + pushReg(SR); + PendingST[i] = SR; + if (NumPendingSTs == i) + ++NumPendingSTs; + } + assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned"); + + // Now we can rearrange the live registers to match what was requested. + shuffleStackTop(PendingST, NumPendingSTs, I); + DEBUG({dbgs() << "Before asm: "; dumpStack();}); + + // With the stack layout fixed, rewrite the FP registers. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) + continue; + unsigned FPReg = getFPReg(Op); + Op.setReg(getSTReg(FPReg)); + } + + // Simulate the inline asm popping its inputs and pushing its outputs. + StackTop -= NumSTPopped; + + // Hold the fixed output registers in scratch FP registers. They will be + // transferred to real FP registers by copies. + NumPendingSTs = 0; + for (unsigned i = 0; i < NumSTDefs; ++i) { + unsigned SR = getScratchReg(); + pushReg(SR); + FPKills &= ~(1u << SR); + } + for (unsigned i = 0; i < NumSTDefs; ++i) + PendingST[NumPendingSTs++] = getStackEntry(i); + DEBUG({dbgs() << "After asm: "; dumpStack();}); + + // If any of the ST defs were dead, pop them immediately. Our caller only + // handles dead FP defs. + MachineBasicBlock::iterator InsertPt = MI; + for (unsigned i = 0; STDefs & (1u << i); ++i) { + if (!(STDeadDefs & (1u << i))) + continue; + freeStackSlotAfter(InsertPt, PendingST[i]); + PendingST[i] = NumFPRegs; + } + while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs) + --NumPendingSTs; + // If this asm kills any FP registers (is the last use of them) we must // explicitly emit pop instructions for them. Do this now after the asm has // executed so that the ST(x) numbers are not off (which would happen if we @@ -1382,16 +1601,16 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // // Note: this might be a non-optimal pop sequence. We might be able to do // better by trying to pop in stack order or something. - MachineBasicBlock::iterator InsertPt = MI; - while (Kills) { - unsigned FPReg = CountTrailingZeros_32(Kills); - freeStackSlotAfter(InsertPt, FPReg); - Kills &= ~(1U << FPReg); + while (FPKills) { + unsigned FPReg = CountTrailingZeros_32(FPKills); + if (isLive(FPReg)) + freeStackSlotAfter(InsertPt, FPReg); + FPKills &= ~(1U << FPReg); } // Don't delete the inline asm! return; } - + case X86::RET: case X86::RETI: // If RET has an FP register use operand, pass the first one in ST(0) and @@ -1489,33 +1708,3 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } else --I; } - -// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. -bool FPS::translateCopy(MachineInstr *MI) { - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - - if (DstReg == X86::ST0) { - MI->setDesc(TII->get(X86::FpSET_ST0_80)); - MI->RemoveOperand(0); - return true; - } - if (DstReg == X86::ST1) { - MI->setDesc(TII->get(X86::FpSET_ST1_80)); - MI->RemoveOperand(0); - return true; - } - if (SrcReg == X86::ST0) { - MI->setDesc(TII->get(X86::FpGET_ST0_80)); - return true; - } - if (SrcReg == X86::ST1) { - MI->setDesc(TII->get(X86::FpGET_ST1_80)); - return true; - } - if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { - MI->setDesc(TII->get(X86::MOV_Fp8080)); - return true; - } - return false; -} diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 1fcc274..15c1917 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1612,16 +1612,18 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { Opc = AtomicOpcTbl[Op][I32]; break; case MVT::i64: + Opc = AtomicOpcTbl[Op][I64]; if (isCN) { if (immSext8(Val.getNode())) Opc = AtomicOpcTbl[Op][SextConstantI64]; else if (i64immSExt32(Val.getNode())) Opc = AtomicOpcTbl[Op][ConstantI64]; - } else - Opc = AtomicOpcTbl[Op][I64]; + } break; } + assert(Opc != 0 && "Invalid arith lock transform!"); + DebugLoc dl = Node->getDebugLoc(); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6cd03d0..4f8b90f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1511,20 +1511,15 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is a call to a function that returns an fp value on the floating // point stack, we must guarantee the the value is popped from the stack, so // a CopyFromReg is not good enough - the copy instruction may be eliminated - // if the return value is not used. We use the FpGET_ST0 instructions + // if the return value is not used. We use the FpPOP_RETVAL instruction // instead. if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) { // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; - bool isST0 = VA.getLocReg() == X86::ST0; - unsigned Opc = 0; - if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32; - if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; - if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; SDValue Ops[] = { Chain, InFlag }; - Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue, - Ops, 2), 1); + Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT, + MVT::Other, MVT::Glue, Ops, 2), 1); Val = Chain.getValue(0); // Round the f80 to the right size, which also moves it to the appropriate @@ -1898,7 +1893,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } // Some CCs need callee pop. - if (Subtarget->IsCalleePop(isVarArg, CallConv)) { + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. @@ -2383,7 +2378,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (Subtarget->IsCalleePop(isVarArg, CallConv)) + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee @@ -2505,6 +2500,10 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if (!FINode) return false; FI = FINode->getIndex(); + } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { + FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); + FI = FINode->getIndex(); + Bytes = Flags.getByValSize(); } else return false; @@ -2556,6 +2555,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; + // An stdcall caller is expected to clean up its arguments; the callee + // isn't going to do that. + if (!CCMatch && CallerCC==CallingConv::X86_StdCall) + return false; + // Do not sibcall optimize vararg calls unless all arguments are passed via // registers. if (isVarArg && !Outs.empty()) { @@ -2692,11 +2696,6 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } - // An stdcall caller is expected to clean up its arguments; the callee - // isn't going to do that. - if (!CCMatch && CallerCC==CallingConv::X86_StdCall) - return false; - return true; } @@ -2876,6 +2875,29 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, return false; } +/// isCalleePop - Determines whether the callee is required to pop its +/// own arguments. Callee pop is necessary to support tail calls. +bool X86::isCalleePop(CallingConv::ID CallingConv, + bool is64Bit, bool IsVarArg, bool TailCallOpt) { + if (IsVarArg) + return false; + + switch (CallingConv) { + default: + return false; + case CallingConv::X86_StdCall: + return !is64Bit; + case CallingConv::X86_FastCall: + return !is64Bit; + case CallingConv::X86_ThisCall: + return !is64Bit; + case CallingConv::Fast: + return TailCallOpt; + case CallingConv::GHC: + return TailCallOpt; + } +} + /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 /// specific condition code, returning the condition code and the LHS/RHS of the /// comparison to make. @@ -12853,69 +12875,41 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -std::vector<unsigned> X86TargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { +std::pair<unsigned, const TargetRegisterClass*> +X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + // First, see if this is a constraint that directly corresponds to an LLVM + // register class. if (Constraint.size() == 1) { - // FIXME: not handling fp-stack yet! - switch (Constraint[0]) { // GCC X86 Constraint Letters - default: break; // Unknown constraint letter + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + // TODO: Slight differences here in allocation order and leaving + // RIP in the class. Do they matter any more here than they do + // in the normal allocation? case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. if (Subtarget->is64Bit()) { - if (VT == MVT::i32) - return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, - X86::ESI, X86::EDI, X86::R8D, X86::R9D, - X86::R10D,X86::R11D,X86::R12D, - X86::R13D,X86::R14D,X86::R15D, - X86::EBP, X86::ESP, 0); - else if (VT == MVT::i16) - return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, - X86::SI, X86::DI, X86::R8W,X86::R9W, - X86::R10W,X86::R11W,X86::R12W, - X86::R13W,X86::R14W,X86::R15W, - X86::BP, X86::SP, 0); - else if (VT == MVT::i8) - return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, - X86::SIL, X86::DIL, X86::R8B,X86::R9B, - X86::R10B,X86::R11B,X86::R12B, - X86::R13B,X86::R14B,X86::R15B, - X86::BPL, X86::SPL, 0); - - else if (VT == MVT::i64) - return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, - X86::RSI, X86::RDI, X86::R8, X86::R9, - X86::R10, X86::R11, X86::R12, - X86::R13, X86::R14, X86::R15, - X86::RBP, X86::RSP, 0); - - break; + if (VT == MVT::i32) + return std::make_pair(0U, X86::GR32RegisterClass); + else if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16RegisterClass); + else if (VT == MVT::i8) + return std::make_pair(0U, X86::GR8RegisterClass); + else if (VT == MVT::i64) + return std::make_pair(0U, X86::GR64RegisterClass); + break; } // 32-bit fallthrough case 'Q': // Q_REGS if (VT == MVT::i32) - return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); + return std::make_pair(0U, X86::GR32_ABCDRegisterClass); else if (VT == MVT::i16) - return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); + return std::make_pair(0U, X86::GR16_ABCDRegisterClass); else if (VT == MVT::i8) - return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); + return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass); else if (VT == MVT::i64) - return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); + return std::make_pair(0U, X86::GR64_ABCDRegisterClass); break; - } - } - - return std::vector<unsigned>(); -} - -std::pair<unsigned, const TargetRegisterClass*> -X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - // First, see if this is a constraint that directly corresponds to an LLVM - // register class. - if (Constraint.size() == 1) { - // GCC Constraint Letters - switch (Constraint[0]) { - default: break; case 'r': // GENERAL_REGS case 'l': // INDEX_REGS if (VT == MVT::i8) diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d61a125..d9c883f 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -466,6 +466,12 @@ namespace llvm { /// fit into displacement field of the instruction. bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement = true); + + + /// isCalleePop - Determines whether the callee is required to pop its + /// own arguments. Callee pop is necessary to support tail calls. + bool isCalleePop(CallingConv::ID CallingConv, + bool is64Bit, bool IsVarArg, bool TailCallOpt); } //===--------------------------------------------------------------------===// @@ -590,10 +596,6 @@ namespace llvm { virtual ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - virtual const char *LowerXConstraint(EVT ConstraintVT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 1ea8071..0245e5c 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -150,11 +150,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { MachineInstr *MI = MIB; MachineFunction &MF = *MI->getParent()->getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); unsigned Flags = 0; - if (TID.mayLoad()) + if (MCID.mayLoad()) Flags |= MachineMemOperand::MOLoad; - if (TID.mayStore()) + if (MCID.mayStore()) Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset), diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index b506f5e..7cb870f 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -112,31 +112,8 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // a pattern) and the FPI instruction should have emission info (e.g. opcode // encoding and asm printing info). -// Pseudo Instructions for FP stack return values. -def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0) -def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0) -def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0) - -// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when -// there are two values live out on the stack from a call or inlineasm. This -// magic is handled by the stackifier. It is not valid to emit FpGET_ST1* and -// then FpGET_ST0*. In addition, it is invalid for any FP-using operations to -// occur between them. -def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1) -def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1) -def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1) - -let Defs = [ST0] in { -def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR -def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR -def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR -} - -let Defs = [ST1] in { -def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR -def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR -def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR -} +// Pseudo Instruction for FP stack return values. +def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FpIf32, FpIf64 - Floating Point Pseudo Instruction template. // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. @@ -147,19 +124,6 @@ class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> : class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> : FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>; -// Register copies. Just copies, the shortening ones do not truncate. -let neverHasSideEffects = 1 in { - def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; - def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; - def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp8080 : FpI_ <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; -} - // Factoring for arithmetic. multiclass FPBinary_rr<SDNode OpNode> { // Register op register -> register diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index aebf8dc..d44bd35 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -13,7 +13,6 @@ #include "X86InstrInfo.h" #include "X86.h" -#include "X86GenInstrInfo.inc" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" @@ -36,6 +35,9 @@ #include "llvm/MC/MCAsmInfo.h" #include <limits> +#define GET_INSTRINFO_MC_DESC +#include "X86GenInstrInfo.inc" + using namespace llvm; static cl::opt<bool> @@ -52,7 +54,13 @@ ReMatPICStubLoad("remat-pic-stub-load", cl::init(false), cl::Hidden); X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) - : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), + : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts), + (tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::ADJCALLSTACKDOWN64 + : X86::ADJCALLSTACKDOWN32), + (tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::ADJCALLSTACKUP64 + : X86::ADJCALLSTACKUP32)), TM(tm), RI(tm, *this) { enum { TB_NOT_REVERSABLE = 1U << 31, @@ -1689,13 +1697,13 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { } bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } @@ -2225,7 +2233,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; // FIXME: AsmPrinter doesn't know how to handle // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. @@ -2274,7 +2282,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; bool NarrowToMOV32rm = false; if (Size) { - unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); + unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize(); if (Size < RCSize) { // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -2543,7 +2551,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, unsigned Opc = MI->getOpcode(); unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires @@ -2589,9 +2597,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, return false; UnfoldStore &= FoldedStore; - const TargetInstrDesc &TID = get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.getRegClass(&RI); + const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) @@ -2633,7 +2640,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, } // Emit the data processing instruction. - MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); + MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true); MachineInstrBuilder MIB(DataMI); if (FoldedStore) @@ -2686,7 +2693,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); + const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI); std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(MI->memoperands_begin(), @@ -2711,9 +2718,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned Index = I->second.second & 0xf; bool FoldedLoad = I->second.second & (1 << 4); bool FoldedStore = I->second.second & (1 << 5); - const TargetInstrDesc &TID = get(Opc); - const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); - unsigned NumDefs = TID.NumDefs; + const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); + unsigned NumDefs = MCID.NumDefs; std::vector<SDValue> AddrOps; std::vector<SDValue> BeforeOps; std::vector<SDValue> AfterOps; @@ -2757,13 +2764,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the data processing instruction. std::vector<EVT> VTs; const TargetRegisterClass *DstRC = 0; - if (TID.getNumDefs() > 0) { - DstRC = TID.OpInfo[0].getRegClass(&RI); + if (MCID.getNumDefs() > 0) { + DstRC = getRegClass(MCID, 0, &RI); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) + if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs()) VTs.push_back(VT); } if (Load) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8377c3a..0bfc5e7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1991,11 +1991,11 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", + "movnti{l}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, TB, Requires<[HasSSE2]>; def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "movnti\t{$src, $dst|$dst, $src}", + "movnti{q}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, TB, Requires<[HasSSE2]>; } diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index f73cff3..31de878 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -411,6 +411,8 @@ let Uses = [RDX, RAX, RCX] in let Defs = [RAX, RDI], Uses = [RDX, RDI] in def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7; +def : InstAlias<"xstorerng", (XSTORE)>; + let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in { def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7; def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 55aceba..04149e7 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -111,7 +111,7 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, - const MCInst &MI, const TargetInstrDesc &Desc, + const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const; void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, @@ -119,7 +119,7 @@ public: raw_ostream &OS) const; void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, - const MCInst &MI, const TargetInstrDesc &Desc, + const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const; }; @@ -379,7 +379,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, /// called VEX. void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, - const TargetInstrDesc &Desc, + const MCInstrDesc &Desc, raw_ostream &OS) const { bool HasVEX_4V = false; if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) @@ -586,7 +586,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand /// size, and 3) use of X86-64 extended registers. static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, - const TargetInstrDesc &Desc) { + const MCInstrDesc &Desc) { unsigned REX = 0; if (TSFlags & X86II::REX_W) REX |= 1 << 3; // set REX.W @@ -596,7 +596,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, unsigned NumOps = MI.getNumOperands(); // FIXME: MCInst should explicitize the two-addrness. bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; @@ -713,7 +713,7 @@ void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, /// Not present, it is -1. void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, - const TargetInstrDesc &Desc, + const MCInstrDesc &Desc, raw_ostream &OS) const { // Emit the lock opcode prefix as needed. @@ -803,7 +803,7 @@ void X86MCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); + const MCInstrDesc &Desc = TII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; // Pseudo instructions don't get encoded. @@ -814,9 +814,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1) ++CurOp; - else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp index 8f3dd32..3711038 100644 --- a/lib/Target/X86/X86MachObjectWriter.cpp +++ b/lib/Target/X86/X86MachObjectWriter.cpp @@ -8,19 +8,541 @@ //===----------------------------------------------------------------------===// #include "X86.h" +#include "X86FixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Object/MachOFormat.h" + using namespace llvm; +using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { + void RecordScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue); + void RecordTLVPRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + + void RecordX86Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + void RecordX86_64Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); public: X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/Is64Bit) {} + + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) { + if (Writer->is64Bit()) + RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + else + RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + } }; } +static bool isFixupKindRIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; +} + +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("invalid fixup kind!"); + case FK_PCRel_1: + case FK_Data_1: return 0; + case FK_PCRel_2: + case FK_Data_2: return 1; + case FK_PCRel_4: + // FIXME: Remove these!!! + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + case X86::reloc_signed_4byte: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + +void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // See <reloc.h>. + uint32_t FixupOffset = + Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + uint32_t FixupAddress = + Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); + int64_t Value = 0; + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + Value = Target.getConstant(); + + if (IsPCRel) { + // Compensate for the relocation offset, Darwin x86_64 relocations only have + // the addend and appear to have attempted to define it to be the actual + // expression addend without the PCrel bias. However, instructions with data + // following the relocation are not accommodated for (see comment below + // regarding SIGNED{1,2,4}), so it isn't exactly that either. + Value += 1LL << Log2Size; + } + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + Type = macho::RIT_X86_64_Unsigned; + Index = 0; + + // FIXME: I believe this is broken, I don't think the linker can understand + // it. I think it would require a local relocation, but I'm not sure if that + // would work either. The official way to get an absolute PCrel relocation + // is to use an absolute symbol (which we don't support yet). + if (IsPCRel) { + IsExtern = 1; + Type = macho::RIT_X86_64_Branch; + } + } else if (Target.getSymB()) { // A - B + constant + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbolData *A_Base = Asm.getAtom(&A_SD); + + const MCSymbol *B = &Target.getSymB()->getSymbol(); + MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData *B_Base = Asm.getAtom(&B_SD); + + // Neither symbol can be modified. + if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported relocation of modified symbol"); + + // We don't support PCrel relocations of differences. Darwin 'as' doesn't + // implement most of these correctly. + if (IsPCRel) + report_fatal_error("unsupported pc-relative relocation of difference"); + + // The support for the situation where one or both of the symbols would + // require a local relocation is handled just like if the symbols were + // external. This is certainly used in the case of debug sections where the + // section has only temporary symbols and thus the symbols don't have base + // symbols. This is encoded using the section ordinal and non-extern + // relocation entries. + + // Darwin 'as' doesn't emit correct relocations for this (it ends up with a + // single SIGNED relocation); reject it for now. Except the case where both + // symbols don't have a base, equal but both NULL. + if (A_Base == B_Base && A_Base) + report_fatal_error("unsupported relocation with identical base"); + + Value += Writer->getSymbolAddress(&A_SD, Layout) - + (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout)); + Value -= Writer->getSymbolAddress(&B_SD, Layout) - + (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout)); + + if (A_Base) { + Index = A_Base->getIndex(); + IsExtern = 1; + } + else { + Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + } + Type = macho::RIT_X86_64_Unsigned; + + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); + + if (B_Base) { + Index = B_Base->getIndex(); + IsExtern = 1; + } + else { + Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + } + Type = macho::RIT_X86_64_Subtractor; + } else { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(&SD); + + // Relocations inside debug sections always use local relocations when + // possible. This seems to be done because the debugger doesn't fully + // understand x86_64 relocation entries, and expects to find values that + // have already been fixed up. + if (Symbol->isInSection()) { + const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( + Fragment->getParent()->getSection()); + if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) + Base = 0; + } + + // x86_64 almost always uses external relocations, except when there is no + // symbol to use as a base address (a local symbol with no preceding + // non-local symbol). + if (Base) { + Index = Base->getIndex(); + IsExtern = 1; + + // Add the local offset, if needed. + if (Base != &SD) + Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); + } else if (Symbol->isInSection() && !Symbol->isVariable()) { + // The index is the section ordinal (1-based). + Index = SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + Value += Writer->getSymbolAddress(&SD, Layout); + + if (IsPCRel) + Value -= FixupAddress + (1 << Log2Size); + } else if (Symbol->isVariable()) { + const MCExpr *Value = Symbol->getVariableValue(); + int64_t Res; + bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, + Writer->getSectionAddressMap()); + if (isAbs) { + FixedValue = Res; + return; + } else { + report_fatal_error("unsupported relocation of variable '" + + Symbol->getName() + "'"); + } + } else { + report_fatal_error("unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); + } + + MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); + if (IsPCRel) { + if (IsRIPRel) { + if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // x86_64 distinguishes movq foo@GOTPCREL so that the linker can + // rewrite the movq to an leaq at link time if the symbol ends up in + // the same linkage unit. + if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) + Type = macho::RIT_X86_64_GOTLoad; + else + Type = macho::RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + Type = macho::RIT_X86_64_TLV; + } else if (Modifier != MCSymbolRefExpr::VK_None) { + report_fatal_error("unsupported symbol modifier in relocation"); + } else { + Type = macho::RIT_X86_64_Signed; + + // The Darwin x86_64 relocation format has a problem where it cannot + // encode an address (L<foo> + <constant>) which is outside the atom + // containing L<foo>. Generally, this shouldn't occur but it does + // happen when we have a RIPrel instruction with data following the + // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel + // adjustment Darwin x86_64 uses, the offset is still negative and the + // linker has no way to recognize this. + // + // To work around this, Darwin uses several special relocation types + // to indicate the offsets. However, the specification or + // implementation of these seems to also be incomplete; they should + // adjust the addend as well based on the actual encoded instruction + // (the additional bias), but instead appear to just look at the final + // offset. + switch (-(Target.getConstant() + (1LL << Log2Size))) { + case 1: Type = macho::RIT_X86_64_Signed1; break; + case 2: Type = macho::RIT_X86_64_Signed2; break; + case 4: Type = macho::RIT_X86_64_Signed4; break; + } + } + } else { + if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in branch " + "relocation"); + + Type = macho::RIT_X86_64_Branch; + } + } else { + if (Modifier == MCSymbolRefExpr::VK_GOT) { + Type = macho::RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which + // case all we do is set the PCrel bit in the relocation entry; this is + // used with exception handling, for example. The source is required to + // include any necessary offset directly. + Type = macho::RIT_X86_64_GOT; + IsPCRel = 1; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + report_fatal_error("TLVP symbol modifier should have been rip-rel"); + } else if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in relocation"); + else + Type = macho::RIT_X86_64_Unsigned; + } + } + + // x86_64 always writes custom values into the fixups. + FixedValue = Value; + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Type = macho::RIT_Vanilla; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + // + // Note that there is no longer any semantic difference between these two + // relocation types from the linkers point of view, this is done solely for + // pedantic compatibility with 'as'. + Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : + (unsigned)macho::RIT_Generic_LocalDifference; + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == macho::RIT_Difference || + Type == macho::RIT_Generic_LocalDifference) { + macho::RelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (macho::RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; + Writer->addRelocation(Fragment->getParent(), MRE); + } + + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && + !is64Bit() && + "Should only be called with a 32-bit TLVP relocation!"); + + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = 0; + + // Get the symbol data. + MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + unsigned Index = SD_A->getIndex(); + + // We're only going to have a second symbol in pic mode and it'll be a + // subtraction from the picbase. For 32-bit pic the addend is the difference + // between the picbase and the next address. For 32-bit static the addend is + // zero. + if (Target.getSymB()) { + // If this is a subtraction then we're pcrel. + uint32_t FixupAddress = + Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); + MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); + IsPCRel = 1; + FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) + + Target.getConstant()); + FixedValue += 1ULL << Log2Size; + } else { + FixedValue = 0; + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = Value; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // Extern + (macho::RIT_Generic_TLV << 28)); // Type + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // If this is a 32-bit TLVP reloc it's handled a bit differently. + if (Target.getSymA() && + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { + RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + return; + } + + // If this is a difference or a defined symbol plus an offset, then we need a + // scattered relocation entry. Differences always require scattered + // relocations. + if (Target.getSymB()) + return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // If this is an internal relocation with an offset, it also needs a scattered + // relocation entry. + uint32_t Offset = Target.getConstant(); + if (IsPCRel) + Offset += 1 << Log2Size; + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) + return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + + // See <reloc.h>. + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + // + // FIXME: Currently, these are never generated (see code below). I cannot + // find a case where they are actually emitted. + Type = macho::RIT_Vanilla; + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + } + + // Check whether we need an external or internal relocation. + if (Writer->doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolOffset(SD); + } else { + // The index is the section ordinal (1-based). + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += Writer->getSectionAddress(&SymSD); + } + if (IsPCRel) + FixedValue -= Writer->getSectionAddress(Fragment->getParent()); + + Type = macho::RIT_Vanilla; + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, bool Is64Bit, uint32_t CPUType, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index fa3e3f8..d32b822 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -39,6 +39,11 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CommandLine.h" + +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "X86GenRegisterInfo.inc" + using namespace llvm; cl::opt<bool> @@ -49,13 +54,7 @@ ForceStackAlign("force-align-stack", X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) - : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ? - X86::ADJCALLSTACKDOWN64 : - X86::ADJCALLSTACKDOWN32, - tm.getSubtarget<X86Subtarget>().is64Bit() ? - X86::ADJCALLSTACKUP64 : - X86::ADJCALLSTACKUP32), - TM(tm), TII(tii) { + : X86GenRegisterInfo(), TM(tm), TII(tii) { // Cache some information. const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); Is64Bit = Subtarget->is64Bit(); @@ -106,6 +105,21 @@ int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour); } +/// getCompactUnwindRegNum - This function maps the register to the number for +/// compact unwind encoding. Return -1 if the register isn't valid. +int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum) const { + switch (RegNum) { + case X86::EBX: case X86::RBX: return 1; + case X86::ECX: case X86::R12: return 2; + case X86::EDX: case X86::R13: return 3; + case X86::EDI: case X86::R14: return 4; + case X86::ESI: case X86::R15: return 5; + case X86::EBP: case X86::RBP: return 6; + } + + return -1; +} + int X86RegisterInfo::getSEHRegNum(unsigned i) const { int reg = getX86RegNum(i); @@ -494,18 +508,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::BPL); } - // Mark the x87 stack registers as reserved, since they don't behave normally - // with respect to liveness. We don't fully model the effects of x87 stack - // pushes and pops after stackification. - Reserved.set(X86::ST0); - Reserved.set(X86::ST1); - Reserved.set(X86::ST2); - Reserved.set(X86::ST3); - Reserved.set(X86::ST4); - Reserved.set(X86::ST5); - Reserved.set(X86::ST6); - Reserved.set(X86::ST7); - // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); @@ -615,7 +617,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); bool reseveCallFrame = TFI->hasReservedCallFrame(MF); int Opcode = I->getOpcode(); - bool isDestroy = Opcode == getCallFrameDestroyOpcode(); + bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); DebugLoc DL = I->getDebugLoc(); uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; @@ -636,13 +638,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; MachineInstr *New = 0; - if (Opcode == getCallFrameSetupOpcode()) { + if (Opcode == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); } else { - assert(Opcode == getCallFrameDestroyOpcode()); + assert(Opcode == TII.getCallFrameDestroyOpcode()); // Factor out the amount the callee already popped. Amount -= CalleeAmt; @@ -665,7 +667,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, return; } - if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) { + if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. @@ -675,6 +677,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); + + // We are not tracking the stack pointer adjustment by the callee, so make + // sure we restore the stack pointer immediately after the call, there may + // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. + MachineBasicBlock::iterator B = MBB.begin(); + while (I != B && !llvm::prior(I)->getDesc().isCall()) + --I; MBB.insert(I, New); } } @@ -918,8 +927,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } } -#include "X86GenRegisterInfo.inc" - namespace { struct MSAH : public MachineFunctionPass { static char ID; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 9fd6ed5..a09c7ee 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -15,7 +15,9 @@ #define X86REGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "X86GenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "X86GenRegisterInfo.inc" namespace llvm { class Type; @@ -79,6 +81,10 @@ public: // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; + /// getCompactUnwindRegNum - This function maps the register to the number for + /// compact unwind encoding. Return -1 if the register isn't valid. + int getCompactUnwindRegNum(unsigned RegNum) const; + /// Code Generation virtual methods... /// diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 14d6d64..203722a 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -206,15 +206,22 @@ let Namespace = "X86" in { def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>; } - // Floating point stack registers - def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; - def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>; - def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>; - def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>; - def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>; - def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>; - def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>; - def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; + class STRegister<string Name, list<Register> A> : Register<Name> { + let Aliases = A; + } + + // Floating point stack registers. These don't map one-to-one to the FP + // pseudo registers, but we still mark them as aliasing FP registers. That + // way both kinds can be live without exceeding the stack depth. ST registers + // are only live around inline assembly. + def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>; + def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>; + def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>; + def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>; + def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>; + def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>; + def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>; + def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>; // Status flags register def EFLAGS : Register<"flags">; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 481e821..d7f630c 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -20,7 +20,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; @@ -285,8 +284,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } } -X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, - bool is64Bit) +X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, + bool is64Bit, unsigned StackAlignOverride) : PICStyle(PICStyles::None) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) @@ -308,15 +308,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , TargetTriple(TT) , Is64Bit(is64Bit) { - // default to hard float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Hard; - // Determine default and user specified characteristics - if (!FS.empty()) { + if (!CPU.empty() || !FS.empty()) { // If feature string is not empty, parse features string. - std::string CPU = sys::getHostCPUName(); - ParseSubtargetFeatures(FS, CPU); + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = sys::getHostCPUName(); + ParseSubtargetFeatures(FS, CPUName); // All X86-64 CPUs also have SSE2, however user might request no SSE via // -mattr, so don't force SSELevel here. if (HasAVX) @@ -346,33 +344,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both // 32 and 64 bit) and for all 64-bit targets. - if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || - isTargetSolaris() || Is64Bit) + if (StackAlignOverride) + stackAlignment = StackAlignOverride; + else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || + isTargetSolaris() || Is64Bit) stackAlignment = 16; - - if (StackAlignment) - stackAlignment = StackAlignment; -} - -/// IsCalleePop - Determines whether the callee is required to pop its -/// own arguments. Callee pop is necessary to support tail calls. -bool X86Subtarget::IsCalleePop(bool IsVarArg, - CallingConv::ID CallingConv) const { - if (IsVarArg) - return false; - - switch (CallingConv) { - default: - return false; - case CallingConv::X86_StdCall: - return !is64Bit(); - case CallingConv::X86_FastCall: - return !is64Bit(); - case CallingConv::X86_ThisCall: - return !is64Bit(); - case CallingConv::Fast: - return GuaranteedTailCallOpt; - case CallingConv::GHC: - return GuaranteedTailCallOpt; - } } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 286a798..80a4103 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -117,7 +117,9 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit); + X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit, + unsigned StackAlignOverride); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every @@ -130,8 +132,7 @@ public: /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID /// instruction. @@ -248,9 +249,6 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; - - /// IsCalleePop - Test whether a function should pop its own arguments. - bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 7483329..1b6fa30 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -87,8 +87,9 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : X86TargetMachine(T, TT, FS, false), + : X86TargetMachine(T, TT, CPU, FS, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" : (getSubtargetImpl()->isTargetCygMing() || @@ -103,8 +104,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : X86TargetMachine(T, TT, FS, true), + : X86TargetMachine(T, TT, CPU, FS, true), DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"), InstrInfo(*this), TSInfo(*this), @@ -115,9 +117,10 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, /// X86TargetMachine ctor - Create an X86 target. /// X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64Bit) : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, is64Bit), + Subtarget(TT, CPU, FS, is64Bit, StackAlignmentOverride), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { DefRelocModel = getRelocationModel(); @@ -182,6 +185,10 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, // Finally, if we have "none" as our PIC style, force to static mode. if (Subtarget.getPICStyle() == PICStyles::None) setRelocationModel(Reloc::Static); + + // default to hard float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Hard; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 5973922..885334a 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -43,7 +43,8 @@ private: public: X86TargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64Bit); + const std::string &CPU, const std::string &FS, + bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { llvm_unreachable("getInstrInfo not implemented"); @@ -87,7 +88,7 @@ class X86_32TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_32TargetMachine(const Target &T, const std::string &M, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; @@ -113,7 +114,7 @@ class X86_64TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_64TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 9093de6..358141c 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -1,10 +1,7 @@ set(LLVM_TARGET_DEFINITIONS XCore.td) -tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(XCoreGenRegisterNames.inc -gen-register-enums) -tablegen(XCoreGenRegisterInfo.inc -gen-register-desc) -tablegen(XCoreGenInstrNames.inc -gen-instr-enums) -tablegen(XCoreGenInstrInfo.inc -gen-instr-desc) +tablegen(XCoreGenRegisterInfo.inc -gen-register-info) +tablegen(XCoreGenInstrInfo.inc -gen-instr-info) tablegen(XCoreGenAsmWriter.inc -gen-asm-writer) tablegen(XCoreGenDAGISel.inc -gen-dag-isel) tablegen(XCoreGenCallingConv.inc -gen-callingconv) diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile index 6c1ef88..ec6fb4c 100644 --- a/lib/Target/XCore/Makefile +++ b/lib/Target/XCore/Makefile @@ -12,9 +12,8 @@ LIBRARYNAME = LLVMXCoreCodeGen TARGET = XCore # Make sure that tblgen is run, first thing. -BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \ - XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \ - XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \ +BUILT_SOURCES = XCoreGenRegisterInfo.inc XCoreGenInstrInfo.inc \ + XCoreGenAsmWriter.inc \ XCoreGenDAGISel.inc XCoreGenCallingConv.inc \ XCoreGenSubtarget.inc diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index 8937fbe..ec4ab91 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -32,10 +32,12 @@ namespace llvm { // Defines symbolic names for XCore registers. This defines a mapping from // register name to register number. // -#include "XCoreGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "XCoreGenRegisterInfo.inc" // Defines symbolic names for the XCore instructions. // -#include "XCoreGenInstrNames.inc" +#define GET_INSTRINFO_ENUM +#include "XCoreGenInstrInfo.inc" #endif diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 8cabbbf..6d040e0 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1591,21 +1591,18 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, // XCore Inline Assembly Support //===----------------------------------------------------------------------===// -std::vector<unsigned> XCoreTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const -{ - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { +std::pair<unsigned, const TargetRegisterClass*> +XCoreTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { default : break; case 'r': - return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2, - XCore::R3, XCore::R4, XCore::R5, - XCore::R6, XCore::R7, XCore::R8, - XCore::R9, XCore::R10, XCore::R11, 0); - break; + return std::make_pair(0U, XCore::GRRegsRegisterClass); + } } - return std::vector<unsigned>(); + // Use the default implementation in TargetLowering to convert the register + // constraint into a member of a register class. + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index a8d67d4..9c803be 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -148,9 +148,9 @@ namespace llvm { SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; // Inline asm support - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 9cb6a7d..cb54520 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -18,11 +18,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" -#include "XCoreGenInstrInfo.inc" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#define GET_INSTRINFO_MC_DESC +#include "XCoreGenInstrInfo.inc" + namespace llvm { namespace XCore { @@ -38,7 +40,8 @@ namespace XCore { using namespace llvm; XCoreInstrInfo::XCoreInstrInfo() - : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)), + : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts), + XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), RI(*this) { } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 46c9e57..2bf43b4 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -33,11 +33,14 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#define GET_REGINFO_MC_DESC +#define GET_REGINFO_TARGET_DESC +#include "XCoreGenRegisterInfo.inc" + using namespace llvm; XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii) - : XCoreGenRegisterInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), - TII(tii) { + : XCoreGenRegisterInfo(), TII(tii) { } // helper functions @@ -328,6 +331,3 @@ unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned XCoreRegisterInfo::getRARegister() const { return XCore::LR; } - -#include "XCoreGenRegisterInfo.inc" - diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 7a9bc9f..801d9eb 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -15,7 +15,9 @@ #define XCOREREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "XCoreGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "XCoreGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp index 78a6fa5..0447d2e 100644 --- a/lib/Target/XCore/XCoreSubtarget.cpp +++ b/lib/Target/XCore/XCoreSubtarget.cpp @@ -15,6 +15,7 @@ #include "XCore.h" using namespace llvm; -XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS) +XCoreSubtarget::XCoreSubtarget(const std::string &TT, + const std::string &CPU, const std::string &FS) { } diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h index f8be3ec..ee40d36 100644 --- a/lib/Target/XCore/XCoreSubtarget.h +++ b/lib/Target/XCore/XCoreSubtarget.h @@ -27,12 +27,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - XCoreSubtarget(const std::string &TT, const std::string &FS); + XCoreSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); }; } // End llvm namespace diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 30da2c8..542038b 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -21,9 +21,10 @@ using namespace llvm; /// XCoreTargetMachine ctor - Create an ILP32 architecture model /// XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) : LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 24daadc..6235ac3 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreSelectionDAGInfo TSInfo; public: XCoreTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameLowering *getFrameLowering() const { diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index aeb3c3e..5733c20 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -796,7 +796,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // So at this point we know we have (Y -> OtherAddOp): // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z - if (SI.getType()->isFloatingPointTy()) { + if (SI.getType()->isFPOrFPVectorTy()) { NegVal = Builder->CreateFNeg(SubOp->getOperand(1)); } else { NegVal = Builder->CreateNeg(SubOp->getOperand(1)); @@ -810,7 +810,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Builder->CreateSelect(CondVal, NewTrueOp, NewFalseOp, SI.getName() + ".p"); - if (SI.getType()->isFloatingPointTy()) + if (SI.getType()->isFPOrFPVectorTy()) return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); else return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 1d79339..77642e5 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -52,6 +52,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" @@ -72,11 +73,9 @@ STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); -// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis -// and referenced here. -namespace llvm { - extern bool DisableIVRewrite; -} +static cl::opt<bool> DisableIVRewrite( + "disable-iv-rewrite", cl::Hidden, + cl::desc("Disable canonical induction variable rewriting")); namespace { class IndVarSimplify : public LoopPass { @@ -86,21 +85,13 @@ namespace { DominatorTree *DT; TargetData *TD; - PHINode *CurrIV; // Current IV being simplified. - - // Instructions processed by SimplifyIVUsers for CurrIV. - SmallPtrSet<Instruction*,16> Simplified; - - // Use-def pairs if IVUsers waiting to be processed for CurrIV. - SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; - SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: static char ID; // Pass identification, replacement for typeid IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0), - CurrIV(0), Changed(false) { + Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -112,7 +103,8 @@ namespace { AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired<IVUsers>(); + if (!DisableIVRewrite) + AU.addRequired<IVUsers>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); @@ -132,7 +124,6 @@ namespace { void EliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); - void pushIVUsers(Instruction *Def); bool isSimpleIVUser(Instruction *I, const Loop *L); void RewriteNonIntegerIVs(Loop *L); @@ -618,8 +609,7 @@ protected: const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); - Instruction *WidenIVUse(Instruction *NarrowUse, - Instruction *NarrowDef, + Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace @@ -669,9 +659,11 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, LHS, RHS, NarrowBO->getName()); Builder.Insert(WideBO); - if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); - if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); - + if (const OverflowingBinaryOperator *OBO = + dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { + if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); + if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); + } return WideBO; } llvm_unreachable(0); @@ -733,9 +725,10 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos, /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. -Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, - Instruction *NarrowDef, +Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef) { + Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser()); + // To be consistent with IVUsers, stop traversing the def-use chain at // inner-loop phis or post-loop phis. if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) @@ -753,7 +746,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, unsigned IVWidth = SE->getTypeSizeInBits(WideType); if (CastWidth < IVWidth) { // The cast isn't as wide as the IV, so insert a Trunc. - IRBuilder<> Builder(NarrowUse); + IRBuilder<> Builder(NarrowDefUse); NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); } else { @@ -787,11 +780,15 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - IRBuilder<> Builder(NarrowUse); + IRBuilder<> Builder(NarrowDefUse); Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); return 0; } + // We assume that block terminators are not SCEVable. + assert(NarrowUse != NarrowUse->getParent()->getTerminator() && + "can't split terminators"); + // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. Instruction *WideUse = 0; @@ -885,20 +882,20 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi)); } while (!NarrowIVUsers.empty()) { - Use *NarrowDefUse; + Use *UsePtr; Instruction *WideDef; - tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val(); + tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val(); + Use &NarrowDefUse = *UsePtr; // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. - Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get()); - Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser()); - Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef); + Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get()); + Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef); // Follow all def-use edges from the previous narrow use. if (WideUse) { - for (Value::use_iterator UI = NarrowUse->use_begin(), - UE = NarrowUse->use_end(); UI != UE; ++UI) { + for (Value::use_iterator UI = NarrowDefUse.getUser()->use_begin(), + UE = NarrowDefUse.getUser()->use_end(); UI != UE; ++UI) { NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse)); } } @@ -1016,12 +1013,13 @@ bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, // Eliminate any operation that SCEV can prove is an identity function. if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) return false; - UseInst->replaceAllUsesWith(IVOperand); - DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); ++NumElimIdentity; Changed = true; DeadInsts.push_back(UseInst); @@ -1030,7 +1028,10 @@ bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, /// pushIVUsers - Add all uses of Def to the current IV's worklist. /// -void IndVarSimplify::pushIVUsers(Instruction *Def) { +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); UI != E; ++UI) { @@ -1038,7 +1039,9 @@ void IndVarSimplify::pushIVUsers(Instruction *Def) { // Avoid infinite or exponential worklist processing. // Also ensure unique worklist users. - if (Simplified.insert(User)) + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (User != Def && Simplified.insert(User)) SimpleIVUsers.push_back(std::make_pair(User, Def)); } } @@ -1056,6 +1059,10 @@ bool IndVarSimplify::isSimpleIVUser(Instruction *I, const Loop *L) { // Get the symbolic expression for this instruction. const SCEV *S = SE->getSCEV(I); + // We assume that terminators are not SCEVable. + assert((!S || I != I->getParent()->getTerminator()) && + "can't fold terminators"); + // Only consider affine recurrences. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); if (AR && AR->getLoop() == L) @@ -1079,50 +1086,75 @@ bool IndVarSimplify::isSimpleIVUser(Instruction *I, const Loop *L) { /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. /// void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { - // Simplification is performed independently for each IV, as represented by a - // loop header phi. Each round of simplification first iterates through the - // SimplifyIVUsers worklist, then determines whether the current IV should be - // widened. Widening adds a new phi to LoopPhis, inducing another round of - // simplification on the wide IV. + std::map<PHINode *, WideIVInfo> WideIVMap; + SmallVector<PHINode*, 8> LoopPhis; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { LoopPhis.push_back(cast<PHINode>(I)); } + // Each round of simplification iterates through the SimplifyIVUsers worklist + // for all current phis, then determines whether any IVs can be + // widened. Widening adds new phis to LoopPhis, inducing another round of + // simplification on the wide IVs. while (!LoopPhis.empty()) { - CurrIV = LoopPhis.pop_back_val(); - Simplified.clear(); - assert(SimpleIVUsers.empty() && "expect empty IV users list"); - - WideIVInfo WI; - - pushIVUsers(CurrIV); - - while (!SimpleIVUsers.empty()) { - Instruction *UseInst, *Operand; - tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); - - if (EliminateIVUser(UseInst, Operand)) { - pushIVUsers(Operand); - continue; - } - if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { - bool IsSigned = Cast->getOpcode() == Instruction::SExt; - if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { - CollectExtend(Cast, IsSigned, WI, SE, TD); + // Evaluate as many IV expressions as possible before widening any IVs. This + // forces SCEV to set no-wrap flags before evaluating sign/zero + // extension. The first time SCEV attempts to normalize sign/zero extension, + // the result becomes final. So for the most predictable results, we delay + // evaluation of sign/zero extend evaluation until needed, and avoid running + // other SCEV based analysis prior to SimplifyIVUsersNoRewrite. + do { + PHINode *CurrIV = LoopPhis.pop_back_val(); + + // Information about sign/zero extensions of CurrIV. + WideIVInfo WI; + + // Instructions processed by SimplifyIVUsers for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IVUsers waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + Instruction *UseInst, *Operand; + tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); + // Bypass back edges to avoid extra work. + if (UseInst == CurrIV) continue; + + if (EliminateIVUser(UseInst, Operand)) { + pushIVUsers(Operand, Simplified, SimpleIVUsers); + continue; + } + if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { + CollectExtend(Cast, IsSigned, WI, SE, TD); + } + continue; + } + if (isSimpleIVUser(UseInst, L)) { + pushIVUsers(UseInst, Simplified, SimpleIVUsers); } - continue; } - if (isSimpleIVUser(UseInst, L)) { - pushIVUsers(UseInst); + if (WI.WidestNativeType) { + WideIVMap[CurrIV] = WI; } - } - if (WI.WidestNativeType) { - WidenIV Widener(CurrIV, WI, LI, SE, DT, DeadInsts); + } while(!LoopPhis.empty()); + + for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(), + E = WideIVMap.end(); I != E; ++I) { + WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts); if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { Changed = true; LoopPhis.push_back(WidePhi); } } + WideIVMap.clear(); } } @@ -1145,8 +1177,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); - CurrIV = NULL; - Simplified.clear(); DeadInsts.clear(); Changed = false; @@ -1157,9 +1187,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. - SCEVExpander Rewriter(*SE); - if (DisableIVRewrite) + SCEVExpander Rewriter(*SE, "indvars"); + + // Eliminate redundant IV users. + // + // Simplification works best when run before other consumers of SCEV. We + // attempt to avoid evaluating SCEVs for sign/zero extend operations until + // other expressions involving loop IVs have been evaluated. This helps SCEV + // set no-wrap flags before normalizing sign/zero extension. + if (DisableIVRewrite) { Rewriter.disableCanonicalMode(); + SimplifyIVUsersNoRewrite(L, Rewriter); + } // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -1171,9 +1210,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV users. - if (DisableIVRewrite) - SimplifyIVUsersNoRewrite(L, Rewriter); - else + if (!DisableIVRewrite) SimplifyIVUsers(Rewriter); // Compute the type of the largest recurrence expression, and decide whether diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index cf18ff0..b500d5b 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -600,8 +600,10 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { TestBB = BBTerm->getSuccessor(i); unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - if (NumPreds < MinNumPreds) + if (NumPreds < MinNumPreds) { MinSucc = i; + MinNumPreds = NumPreds; + } } return MinSucc; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index dbf6eec..a7bc0e0 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -167,7 +167,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { if (Instruction *I = dyn_cast<Instruction>(V)) if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + deleteDeadInstruction(I, SE); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -467,8 +467,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE); - + SCEVExpander Expander(*SE, "loop-idiom"); + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -488,7 +488,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, deleteIfDeadInstruction(BasePtr, *SE); return false; } - + // Okay, everything looks good, insert the memset. // The # stored bytes is (BECount+1)*Size. Expand the trip count out to @@ -556,8 +556,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE); - + SCEVExpander Expander(*SE, "loop-idiom"); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -568,7 +568,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); - + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { @@ -593,9 +593,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, deleteIfDeadInstruction(StoreBasePtr, *SE); return false; } - + // Okay, everything is safe, we can transform this! - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. @@ -619,7 +619,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); - + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index afa0bf8..c6ca99a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3698,7 +3698,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, // we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(SE); + SCEVExpander Rewriter(SE, "lsr"); Rewriter.disableCanonicalMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 46ac948..87e364d 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -152,7 +152,8 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); - static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); + static MemTransferInst *isOnlyCopiedFromConstantGlobal( + AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); }; // SROA_DT - SROA that uses DominatorTree. @@ -1302,7 +1303,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { LoadInst *TrueLoad = Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); LoadInst *FalseLoad = - Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); // Transfer alignment and TBAA info if present. TrueLoad->setAlignment(LI->getAlignment()); @@ -1443,8 +1444,8 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) { // performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the malloc/alloca instructions in the function, removing -// them if they are only used by getelementptr instructions. +// which runs on all of the alloca instructions in the function, removing them +// if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; @@ -1478,12 +1479,15 @@ bool SROA::performScalarRepl(Function &F) { // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. - if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); - Constant *TheSrc = cast<Constant>(TheCopy->getSource()); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + ToDelete[i]->eraseFromParent(); + Constant *TheSrc = cast<Constant>(Copy->getSource()); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - TheCopy->eraseFromParent(); // Don't mutate the global. + Copy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); ++NumGlobals; Changed = true; @@ -2507,8 +2511,14 @@ static bool PointsToConstantGlobal(Value *V) { /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to /// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. -static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset) { +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + bool isOffset, + SmallVector<Instruction *, 4> &LifetimeMarkers) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { User *U = cast<Instruction>(*UI); @@ -2520,7 +2530,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, + LifetimeMarkers)) return false; continue; } @@ -2528,7 +2539,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices())) + isOffset || !GEP->hasAllZeroIndices(), + LifetimeMarkers)) return false; continue; } @@ -2554,6 +2566,16 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + LifetimeMarkers.push_back(II); + continue; + } + } + // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(U); @@ -2590,9 +2612,11 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. -MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { +MemTransferInst * +SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVector<Instruction*, 4> &ToDelete) { MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) + if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) return TheCopy; return 0; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 506e5e8..0f6d9ae 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -536,9 +536,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, -/// potential debug intrinsics and the branch. If possible, eliminate BB by -/// rewriting all the predecessors to branch to the successor block and return -/// true. If we can't transform, return false. +/// potential side-effect free intrinsics and the branch. If possible, +/// eliminate BB by rewriting all the predecessors to branch to the successor +/// block and return true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -613,13 +613,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } } - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. - Succ->getInstList().splice(Succ->begin(), - BB->getInstList(), BB->begin()); - } else { + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. assert(PN->use_empty() && "There shouldn't be any uses here!"); PN->eraseFromParent(); @@ -642,7 +644,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { bool Changed = false; // This implementation doesn't currently consider undef operands - // specially. Theroetically, two phis which are identical except for + // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. // Map from PHI hash values to PHI nodes. If multiple PHIs have diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 32d1dcc..e5a00f4 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -73,22 +74,6 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { }; } -/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer -/// are lifetime markers. -/// -static bool onlyUsedByLifetimeMarkers(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); - if (!II) return false; - - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) - return false; - } - return true; -} - /// isAllocaPromotable - Return true if this alloca is legal for promotion. /// This is true if there are only loads and stores to the alloca. /// diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 7b93b4a..49726d5 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2604,7 +2604,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); // If the Terminator is the only non-phi instruction, simplify the block. - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp index 7d47044..70265c8 100644 --- a/lib/VMCore/BasicBlock.cpp +++ b/lib/VMCore/BasicBlock.cpp @@ -147,6 +147,26 @@ Instruction* BasicBlock::getFirstNonPHIOrDbg() { return &*i; } +Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() { + // All valid basic blocks should have a terminator, + // which is not a PHINode. If we have an invalid basic + // block we'll get an assertion failure when dereferencing + // a past-the-end iterator. + BasicBlock::iterator i = begin(); + for (;; ++i) { + if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) + continue; + + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i); + if (!II) + break; + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + break; + } + return &*i; +} + void BasicBlock::dropAllReferences() { for(iterator I = begin(), E = end(); I != E; ++I) I->dropAllReferences(); diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 579d356..b7a1350 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -730,9 +730,12 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, } + if (isa<UndefValue>(Cond)) { + if (isa<UndefValue>(V1)) return V1; + return V2; + } if (isa<UndefValue>(V1)) return V2; if (isa<UndefValue>(V2)) return V1; - if (isa<UndefValue>(Cond)) return V1; if (V1 == V2) return V1; if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) { @@ -1014,20 +1017,38 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::Add: case Instruction::Sub: return UndefValue::get(C1->getType()); - case Instruction::Mul: case Instruction::And: + if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef + return C1; + return Constant::getNullValue(C1->getType()); // undef & X -> 0 + case Instruction::Mul: { + ConstantInt *CI; + // X * undef -> undef if X is odd or undef + if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) || + ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) || + (isa<UndefValue>(C1) && isa<UndefValue>(C2))) + return UndefValue::get(C1->getType()); + + // X * undef -> 0 otherwise return Constant::getNullValue(C1->getType()); + } case Instruction::UDiv: case Instruction::SDiv: + // undef / 1 -> undef + if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv) + if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) + if (CI2->isOne()) + return C1; + // FALL THROUGH case Instruction::URem: case Instruction::SRem: if (!isa<UndefValue>(C2)) // undef / X -> 0 return Constant::getNullValue(C1->getType()); return C2; // X / undef -> undef case Instruction::Or: // X | undef -> -1 - if (const VectorType *PTy = dyn_cast<VectorType>(C1->getType())) - return Constant::getAllOnesValue(PTy); - return Constant::getAllOnesValue(C1->getType()); + if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef + return C1; + return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0 case Instruction::LShr: if (isa<UndefValue>(C2) && isa<UndefValue>(C1)) return C1; // undef lshr undef -> undef @@ -1041,6 +1062,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, else return C1; // X ashr undef --> X case Instruction::Shl: + if (isa<UndefValue>(C2) && isa<UndefValue>(C1)) + return C1; // undef shl undef -> undef // undef << X -> 0 or X << undef -> 0 return Constant::getNullValue(C1->getType()); } @@ -1831,7 +1854,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) { // For EQ and NE, we can always pick a value for the undef to make the // predicate pass or fail, so we can return undef. - if (ICmpInst::isEquality(ICmpInst::Predicate(pred))) + // Also, if both operands are undef, we can return undef. + if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) || + (isa<UndefValue>(C1) && isa<UndefValue>(C2))) return UndefValue::get(ResultTy); // Otherwise, pick the same value as the non-undef operand, and fold // it to true or false. diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 87f2fe6..4e6e64d 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -1011,17 +1011,32 @@ bool ConstantArray::isCString() const { } -/// getAsString - If the sub-element type of this array is i8 -/// then this method converts the array to an std::string and returns it. -/// Otherwise, it asserts out. +/// convertToString - Helper function for getAsString() and getAsCString(). +static std::string convertToString(const User *U, unsigned len) +{ + std::string Result; + Result.reserve(len); + for (unsigned i = 0; i != len; ++i) + Result.push_back((char)cast<ConstantInt>(U->getOperand(i))->getZExtValue()); + return Result; +} + +/// getAsString - If this array is isString(), then this method converts the +/// array to an std::string and returns it. Otherwise, it asserts out. /// std::string ConstantArray::getAsString() const { assert(isString() && "Not a string!"); - std::string Result; - Result.reserve(getNumOperands()); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - Result.push_back((char)cast<ConstantInt>(getOperand(i))->getZExtValue()); - return Result; + return convertToString(this, getNumOperands()); +} + + +/// getAsCString - If this array is isCString(), then this method converts the +/// array (without the trailing null byte) to an std::string and returns it. +/// Otherwise, it asserts out. +/// +std::string ConstantArray::getAsCString() const { + assert(isCString() && "Not a string!"); + return convertToString(this, getNumOperands() - 1); } |