diff options
author | Shih-wei Liao <sliao@google.com> | 2012-04-24 11:26:46 -0700 |
---|---|---|
committer | Shih-wei Liao <sliao@google.com> | 2012-04-24 11:26:46 -0700 |
commit | cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d (patch) | |
tree | 557137810ae9efc96147d672d372e4dabd0a2440 /lib/Target | |
parent | 4c8fab82874a29dcd2b242533af3ebe7f66bfd74 (diff) | |
parent | fc728fbdc2631ce8f343cf9b7292d218fde7419f (diff) | |
download | external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.zip external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.tar.gz external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.tar.bz2 |
Merge with LLVM upstream r155090.
Conflicts:
lib/Support/Unix/PathV2.inc
Change-Id: I7b89833849f6cbcfa958a33a971d0f7754c9cb2c
Diffstat (limited to 'lib/Target')
214 files changed, 6116 insertions, 6252 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b05fe62..9b0cb0c 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -38,9 +38,6 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; -def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true", - "Enable Advanced SIMD2 instructions", - [FeatureNEON]>; def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", @@ -76,8 +73,6 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; -// Allow more precision in FP computation -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index ca30716..410790a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -16,7 +16,6 @@ #include "ARMAsmPrinter.h" #include "ARM.h" #include "ARMBuildAttrs.h" -#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMTargetMachine.h" @@ -35,7 +34,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectStreamer.h" @@ -44,8 +42,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -732,8 +728,9 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasNEON2()) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4"); + if (Subtarget->hasVFP4()) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + "neon-vfpv4"); else AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); /* If emitted for NEON, omit from VFP below, since you can have both @@ -1270,7 +1267,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } // Darwin call instructions are just normal call instructions with different // clobber semantics (they clobber R9). - case ARM::BXr9_CALL: case ARM::BX_CALL: { { MCInst TmpInst; @@ -1292,7 +1288,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::tBXr9_CALL: case ARM::tBX_CALL: { { MCInst TmpInst; @@ -1315,7 +1310,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::BMOVPCRXr9_CALL: case ARM::BMOVPCRX_CALL: { { MCInst TmpInst; @@ -1343,7 +1337,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::BMOVPCBr9_CALL: case ARM::BMOVPCB_CALL: { { MCInst TmpInst; @@ -1371,7 +1364,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::t2BMOVPCBr9_CALL: case ARM::t2BMOVPCB_CALL: { { MCInst TmpInst; @@ -1984,10 +1976,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } { MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRr); + TmpInst.setOpcode(ARM::tLDRi); TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + TmpInst.addOperand(MCOperand::CreateImm(0)); // Predicate. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 4b276c5..af3f75a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -107,7 +107,7 @@ public: if (!Subtarget->isTargetDarwin()) return 0; return Subtarget->isThumb() ? - llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 366e2fa..c6280f8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -13,10 +13,10 @@ #include "ARMBaseInstrInfo.h" #include "ARM.h" +#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -680,29 +680,51 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place. - if (ARM::QQPRRegClass.contains(DestReg, SrcReg) || - ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + // Handle register classes that require multiple instructions. + unsigned BeginIdx = 0; + unsigned SubRegs = 0; + unsigned Spacing = 1; + + // Use VORRq when possible. + if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; + else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; + // Fall back to VMOVD. + else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; + else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; + else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; + + else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; + else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; + else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; + + if (Opc) { const TargetRegisterInfo *TRI = &getRegisterInfo(); - assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum."); - unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ? - ARM::qsub_1 : ARM::qsub_3; - for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, i); - unsigned Src = TRI->getSubReg(SrcReg, i); - MachineInstrBuilder Mov = - AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq)) - .addReg(Dst, RegState::Define) - .addReg(Src, getKillRegState(KillSrc)) - .addReg(Src, getKillRegState(KillSrc))); - if (i == EndSubReg) { - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - } + MachineInstrBuilder Mov; + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); + Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src)); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); return; } + llvm_unreachable("Impossible reg-to-reg copy"); } @@ -757,7 +779,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (ARM::DPairRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) @@ -907,7 +929,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (ARM::DPairRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI).addImm(16) @@ -1478,6 +1500,29 @@ int llvm::getMatchingCondBranchOpcode(int Opc) { llvm_unreachable("Unknown unconditional branch opcode!"); } +/// commuteInstruction - Handle commutable instructions. +MachineInstr * +ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + switch (MI->getOpcode()) { + case ARM::MOVCCr: + case ARM::t2MOVCCr: { + // MOVCC can be commuted by inverting the condition. + unsigned PredReg = 0; + ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); + // MOVCC AL can't be inverted. Shouldn't happen. + if (CC == ARMCC::AL || PredReg != ARM::CPSR) + return NULL; + MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + if (!MI) + return NULL; + // After swapping the MOVCC operands, also invert the condition. + MI->getOperand(MI->findFirstPredOperandIdx()) + .setImm(ARMCC::getOppositeCondition(CC)); + return MI; + } + } + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); +} /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the /// instruction is encoded with an 'S' bit is determined by the optional CPSR @@ -1916,6 +1961,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, if (!MRI->hasOneNonDBGUse(Reg)) return false; + const MCInstrDesc &DefMCID = DefMI->getDesc(); + if (DefMCID.hasOptionalDef()) { + unsigned NumOps = DefMCID.getNumOperands(); + const MachineOperand &MO = DefMI->getOperand(NumOps-1); + if (MO.getReg() == ARM::CPSR && !MO.isDead()) + // If DefMI defines CPSR and it is not dead, it's obviously not safe + // to delete DefMI. + return false; + } + + const MCInstrDesc &UseMCID = UseMI->getDesc(); + if (UseMCID.hasOptionalDef()) { + unsigned NumOps = UseMCID.getNumOperands(); + if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) + // If the instruction sets the flag, do not attempt this optimization + // since it may change the semantics of the code. + return false; + } + unsigned UseOpc = UseMI->getOpcode(); unsigned NewUseOpc = 0; uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 314e317..2fe8507 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -139,6 +139,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; + MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 291369f..3907f75 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -15,7 +15,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMFrameLowering.h" -#include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 2b9c55d..0bd1c3e 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -17,7 +17,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index d33364b..b9a2512 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -9,10 +9,6 @@ // This describes the calling conventions for ARM architecture. //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A>: - CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>; - /// CCIfAlign - Match of the original alignment of the arg class CCIfAlign<string Align, CCAction A>: CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index e48d07a..bc681be 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -15,7 +15,7 @@ #define DEBUG_TYPE "jit" #include "ARM.h" #include "ARMConstantPoolValue.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" @@ -46,7 +46,7 @@ namespace { class ARMCodeEmitter : public MachineFunctionPass { ARMJITInfo *JTI; - const ARMInstrInfo *II; + const ARMBaseInstrInfo *II; const TargetData *TD; const ARMSubtarget *Subtarget; TargetMachine &TM; @@ -66,7 +66,7 @@ namespace { public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), - II((const ARMInstrInfo *)tm.getInstrInfo()), + II((const ARMBaseInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} @@ -383,9 +383,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo(); - II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo(); - TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData(); + JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo(); + II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo(); + TD = MF.getTarget().getTargetData(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -917,9 +917,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { emitMiscBranchInstruction(MI); break; case ARM::BX_CALL: - case ARM::BMOVPCRX_CALL: - case ARM::BXr9_CALL: - case ARM::BMOVPCRXr9_CALL: { + case ARM::BMOVPCRX_CALL: { // First emit mov lr, pc unsigned Binary = 0x01a0e00f; Binary |= II->getPredicate(&MI) << ARMII::CondShift; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 2cdfd1e..fc35c7c 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -16,12 +16,12 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" -#include "ARMInstrInfo.h" #include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" @@ -209,8 +209,9 @@ namespace { } /// getMaxDisp - Returns the maximum displacement supported by MI. /// Correct for unknown alignment. + /// Conservatively subtract 2 bytes to handle weird alignment effects. unsigned getMaxDisp() const { - return KnownAlignment ? MaxDisp : MaxDisp - 2; + return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2; } }; @@ -266,7 +267,7 @@ namespace { MachineFunction *MF; MachineConstantPool *MCP; - const ARMInstrInfo *TII; + const ARMBaseInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool isThumb; @@ -283,51 +284,52 @@ namespace { } private: - void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs); + void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); unsigned getCPELogAlign(const MachineInstr *CPEMI); - void JumpTableFunctionScan(); - void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs); - MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); - void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); - void AdjustBBOffsetsAfter(MachineBasicBlock *BB); - bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); - int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); - bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); - void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, + void scanFunctionJumpTables(); + void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + int findInRangeCPEntry(CPUser& U, unsigned UserOffset); + bool findAvailableWater(CPUser&U, unsigned UserOffset, + water_iterator &WaterIter); + void createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool HandleConstantPoolUser(unsigned CPUserIndex); - void RemoveDeadCPEMI(MachineInstr *CPEMI); - bool RemoveUnusedCPEntries(); - bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, - MachineInstr *CPEMI, unsigned Disp, bool NegOk, - bool DoDump = false); - bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, + bool handleConstantPoolUser(unsigned CPUserIndex); + void removeDeadCPEMI(MachineInstr *CPEMI); + bool removeUnusedCPEntries(); + bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, unsigned Disp, bool NegOk, + bool DoDump = false); + bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, CPUser &U, unsigned &Growth); - bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(ImmBranch &Br); - bool FixUpConditionalBr(ImmBranch &Br); - bool FixUpUnconditionalBr(ImmBranch &Br); - bool UndoLRSpillRestore(); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + bool fixupUnconditionalBr(ImmBranch &Br); + bool undoLRSpillRestore(); bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const; - bool OptimizeThumb2Instructions(); - bool OptimizeThumb2Branches(); - bool ReorderThumb2JumpTables(); - bool OptimizeThumb2JumpTables(); - MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, + bool optimizeThumb2Instructions(); + bool optimizeThumb2Branches(); + bool reorderThumb2JumpTables(); + bool optimizeThumb2JumpTables(); + MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); - void ComputeBlockSize(MachineBasicBlock *MBB); - unsigned GetOffsetOf(MachineInstr *MI) const; - unsigned GetUserOffset(CPUser&) const; + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + unsigned getUserOffset(CPUser&) const; void dumpBBs(); void verify(); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, unsigned Disp, bool NegativeOK, bool IsSoImm = false); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, const CPUser &U) { - return OffsetIsInRange(UserOffset, TrialOffset, + return isOffsetInRange(UserOffset, TrialOffset, U.getMaxDisp(), U.NegOk, U.IsSoImm); } }; @@ -345,11 +347,21 @@ void ARMConstantIslands::verify() { assert(BBInfo[MBBId].Offset % (1u << Align) == 0); assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } + DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; - unsigned UserOffset = GetUserOffset(U); - assert(CPEIsInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp(), U.NegOk) && - "Constant pool entry out of range!"); + unsigned UserOffset = getUserOffset(U); + // Verify offset using the real max displacement without the safety + // adjustment. + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk, + /* DoDump = */ true)) { + DEBUG(dbgs() << "OK\n"); + continue; + } + DEBUG(dbgs() << "Out of range.\n"); + dumpBBs(); + DEBUG(MF->dump()); + llvm_unreachable("Constant pool entry out of range!"); } #endif } @@ -382,7 +394,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlignment() << " bytes *****\n"); - TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo(); + TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo(); AFI = MF->getInfo<ARMFunctionInfo>(); STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); @@ -392,6 +404,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { HasFarJump = false; + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); + // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. MF->RenumberBlocks(); @@ -400,8 +415,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // of the TB[BH] instructions. bool MadeChange = false; if (isThumb2 && AdjustJumpTableBlocks) { - JumpTableFunctionScan(); - MadeChange |= ReorderThumb2JumpTables(); + scanFunctionJumpTables(); + MadeChange |= reorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. T2JumpTables.clear(); // Blocks may have shifted around. Keep the numbering up to date. @@ -419,7 +434,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; if (!MCP->isEmpty()) - DoInitialPlacement(CPEMIs); + doInitialPlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -427,13 +442,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(CPEMIs); + initializeFunctionInfo(CPEMIs); CPEMIs.clear(); DEBUG(dumpBBs()); /// Remove dead constant pool entries. - MadeChange |= RemoveUnusedCPEntries(); + MadeChange |= removeUnusedCPEntries(); // Iteratively place constant pool entries and fix up branches until there // is no change. @@ -442,7 +457,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= HandleConstantPoolUser(i); + CPChange |= handleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); @@ -454,7 +469,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= FixUpImmediateBr(ImmBranches[i]); + BRChange |= fixupImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) report_fatal_error("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); @@ -466,7 +481,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Shrink 32-bit Thumb2 branch, load, and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) - MadeChange |= OptimizeThumb2Instructions(); + MadeChange |= optimizeThumb2Instructions(); // After a while, this might be made debug-only, but it is not expensive. verify(); @@ -474,7 +489,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // If LR has been forced spilled and no far jump (i.e. BL) has been issued, // undo the spill / restore of LR if possible. if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) - MadeChange |= UndoLRSpillRestore(); + MadeChange |= undoLRSpillRestore(); // Save the mapping between original and cloned constpool entries. for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { @@ -497,10 +512,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { return MadeChange; } -/// DoInitialPlacement - Perform the initial placement of the constant pool +/// doInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. void -ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { +ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); @@ -610,10 +625,10 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { return Log2_32(Align); } -/// JumpTableFunctionScan - Do a scan of the function, building up +/// scanFunctionJumpTables - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. -void ARMConstantIslands::JumpTableFunctionScan() { +void ARMConstantIslands::scanFunctionJumpTables() { for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -625,11 +640,11 @@ void ARMConstantIslands::JumpTableFunctionScan() { } } -/// InitialFunctionScan - Do the initial scan of the function, building up +/// initializeFunctionInfo - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. void ARMConstantIslands:: -InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { +initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { BBInfo.clear(); BBInfo.resize(MF->getNumBlockIDs()); @@ -638,14 +653,14 @@ InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { // alignment assumptions, as we don't know for sure the size of any // instructions in the inline assembly. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - ComputeBlockSize(I); + computeBlockSize(I); // The known bits of the entry block offset are determined by the function // alignment. BBInfo.front().KnownBits = MF->getAlignment(); // Compute block offsets and known bits. - AdjustBBOffsetsAfter(MF->begin()); + adjustBBOffsetsAfter(MF->begin()); // Now go back through the instructions and build up our data structures. for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); @@ -790,9 +805,9 @@ InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { } } -/// ComputeBlockSize - Compute the size and some alignment information for MBB. +/// computeBlockSize - Compute the size and some alignment information for MBB. /// This function updates BBInfo directly. -void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { +void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; BBI.Size = 0; BBI.Unalign = 0; @@ -817,10 +832,10 @@ void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { } } -/// GetOffsetOf - Return the current offset of the specified machine instruction +/// getOffsetOf - Return the current offset of the specified machine instruction /// from the start of the function. This offset changes as stuff is moved /// around inside the function. -unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { +unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); // The offset is composed of two things: the sum of the sizes of all MBB's @@ -843,10 +858,10 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS, return LHS->getNumber() < RHS->getNumber(); } -/// UpdateForInsertedWaterBlock - When a block is newly inserted into the +/// updateForInsertedWaterBlock - When a block is newly inserted into the /// machine function, it upsets all of the block numbers. Renumber the blocks /// and update the arrays that parallel this numbering. -void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { +void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { // Renumber the MBB's to keep them consecutive. NewBB->getParent()->RenumberBlocks(NewBB); @@ -866,7 +881,7 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. -MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { +MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); // Create a new MBB for the code after the OrigBB. @@ -897,7 +912,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { OrigBB->addSuccessor(NewBB); // Update internal data structures to account for the newly inserted MBB. - // This is almost the same as UpdateForInsertedWaterBlock, except that + // This is almost the same as updateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. MF->RenumberBlocks(NewBB); @@ -924,23 +939,23 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) - ComputeBlockSize(OrigBB); + computeBlockSize(OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. - ComputeBlockSize(NewBB); + computeBlockSize(NewBB); // All BBOffsets following these blocks must be modified. - AdjustBBOffsetsAfter(OrigBB); + adjustBBOffsetsAfter(OrigBB); return NewBB; } -/// GetUserOffset - Compute the offset of U.MI as seen by the hardware +/// getUserOffset - Compute the offset of U.MI as seen by the hardware /// displacement computation. Update U.KnownAlignment to match its current /// basic block location. -unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { - unsigned UserOffset = GetOffsetOf(U.MI); +unsigned ARMConstantIslands::getUserOffset(CPUser &U) const { + unsigned UserOffset = getOffsetOf(U.MI); const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()]; unsigned KnownBits = BBI.internalKnownBits(); @@ -960,13 +975,13 @@ unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { return UserOffset; } -/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool +/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool /// reference) is within MaxDisp of TrialOffset (a proposed location of a /// constant pool entry). -/// UserOffset is computed by GetUserOffset above to include PC adjustments. If +/// UserOffset is computed by getUserOffset above to include PC adjustments. If /// the mod 4 alignment of UserOffset is not known, the uncertainty must be /// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. -bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, +bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK, bool IsSoImm) { if (UserOffset <= TrialOffset) { @@ -982,11 +997,11 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, return false; } -/// WaterIsInRange - Returns true if a CPE placed after the specified +/// isWaterInRange - Returns true if a CPE placed after the specified /// Water (a basic block) will be in range for the specific MI. /// /// Compute how much the function will grow by inserting a CPE after Water. -bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, +bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, MachineBasicBlock* Water, CPUser &U, unsigned &Growth) { unsigned CPELogAlign = getCPELogAlign(U.CPEMI); @@ -1013,7 +1028,7 @@ bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. Also account for unknown alignment padding + // the offset of the instruction. Also account for unknown alignment padding // in blocks between CPE and the user. if (CPEOffset < UserOffset) UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); @@ -1021,15 +1036,15 @@ bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, // CPE fits in existing padding. Growth = 0; - return OffsetIsInRange(UserOffset, CPEOffset, U); + return isOffsetInRange(UserOffset, CPEOffset, U); } -/// CPEIsInRange - Returns true if the distance between specific MI and +/// isCPEntryInRange - Returns true if the distance between specific MI and /// specific ConstPool entry instruction can fit in MI's displacement field. -bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, +bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { - unsigned CPEOffset = GetOffsetOf(CPEMI); + unsigned CPEOffset = getOffsetOf(CPEMI); assert(CPEOffset % 4 == 0 && "Misaligned CPE"); if (DoDump) { @@ -1046,7 +1061,7 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, }); } - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); + return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk); } #ifndef NDEBUG @@ -1066,7 +1081,7 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { } #endif // NDEBUG -void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { +void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { unsigned BBNum = BB->getNumber(); for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { // Get the offset and known bits at the end of the layout predecessor. @@ -1088,17 +1103,18 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { } } -/// DecrementOldEntry - find the constant pool entry with index CPI +/// decrementCPEReferenceCount - find the constant pool entry with index CPI /// and instruction CPEMI, and decrement its refcount. If the refcount /// becomes 0 remove the entry and instruction. Returns true if we removed /// the entry, false if we didn't. -bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { +bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, + MachineInstr *CPEMI) { // Find the old entry. Eliminate it if it is no longer used. CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); assert(CPE && "Unexpected!"); if (--CPE->RefCount == 0) { - RemoveDeadCPEMI(CPEMI); + removeDeadCPEMI(CPEMI); CPE->CPEMI = NULL; --NumCPEs; return true; @@ -1112,13 +1128,14 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { /// 0 = no existing entry found /// 1 = entry found, and there were no code insertions or deletions /// 2 = entry found, and there were code insertions or deletions -int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) +int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) { MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; // Check to see if the CPE is already in-range. - if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, true)) { + if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, + true)) { DEBUG(dbgs() << "In range\n"); return 1; } @@ -1133,7 +1150,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), + if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), U.NegOk)) { DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); @@ -1149,7 +1166,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) CPEs[i].RefCount++; // ...and the original. If we didn't remove the old entry, none of the // addresses changed, so we don't need another pass. - return DecrementOldEntry(CPI, CPEMI) ? 2 : 1; + return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; } } return 0; @@ -1170,7 +1187,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { return ((1<<23)-1)*4; } -/// LookForWater - Look for an existing entry in the WaterList in which +/// findAvailableWater - Look for an existing entry in the WaterList in which /// we can place the CPE referenced from U so it's within range of U's MI. /// Returns true if found, false if not. If it returns true, WaterIter /// is set to the WaterList entry. For Thumb, prefer water that will not @@ -1178,7 +1195,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { /// terminates, the CPE location for a particular CPUser is only allowed to /// move to a lower address, so search backward from the end of the list and /// prefer the first water that is in range. -bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, +bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, water_iterator &WaterIter) { if (WaterList.empty()) return false; @@ -1196,7 +1213,7 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. unsigned Growth; - if (WaterIsInRange(UserOffset, WaterBB, U, Growth) && + if (isWaterInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || NewWaterList.count(WaterBB)) && Growth < BestGrowth) { // This is the least amount of required padding seen so far. @@ -1215,14 +1232,14 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, return BestGrowth != ~0u; } -/// CreateNewWater - No existing WaterList entry will work for +/// createNewWater - No existing WaterList entry will work for /// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the /// block is used if in range, and the conditional branch munged so control /// flow is correct. Otherwise the block is split to create a hole with an /// unconditional branch around it. In either case NewMBB is set to a /// block following which the new island can be inserted (the WaterList /// is not adjusted). -void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, +void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB) { CPUser &U = CPUsers[CPUserIndex]; @@ -1245,7 +1262,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign, UserBBI.postKnownBits()); - if (OffsetIsInRange(UserOffset, CPEOffset, U)) { + if (isOffsetInRange(UserOffset, CPEOffset, U)) { DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() << format(", expected CPE offset %#x\n", CPEOffset)); NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); @@ -1264,7 +1281,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); BBInfo[UserMBB->getNumber()].Size += Delta; - AdjustBBOffsetsAfter(UserMBB); + adjustBBOffsetsAfter(UserMBB); return; } } @@ -1298,7 +1315,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // The 4 in the following is for the unconditional branch we'll be inserting // (allows for long branch on Thumb1). Alignment of the island is handled - // inside OffsetIsInRange. + // inside isOffsetInRange. BaseInsertOffset -= 4; DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) @@ -1327,7 +1344,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, MI = llvm::next(MI)) { if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; - if (!OffsetIsInRange(Offset, EndInsertOffset, U)) { + if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. BaseInsertOffset -= 1u << LogAlign; EndInsertOffset -= 1u << LogAlign; @@ -1352,29 +1369,29 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // Avoid splitting an IT block. if (LastIT) { unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC != ARMCC::AL) MI = LastIT; } - NewMBB = SplitBlockBeforeInstr(MI); + NewMBB = splitBlockBeforeInstr(MI); } -/// HandleConstantPoolUser - Analyze the specified user, checking to see if it +/// handleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { +bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); // Compute this only once, it's expensive. - unsigned UserOffset = GetUserOffset(U); + unsigned UserOffset = getUserOffset(U); // See if the current entry is within range, or there is a clone of it // in range. - int result = LookForExistingCPEntry(U, UserOffset); + int result = findInRangeCPEntry(U, UserOffset); if (result==1) return false; else if (result==2) return true; @@ -1386,7 +1403,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; - if (LookForWater(U, UserOffset, IP)) { + if (findAvailableWater(U, UserOffset, IP)) { DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; @@ -1403,9 +1420,9 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { } else { // No water found. DEBUG(dbgs() << "No water found\n"); - CreateNewWater(CPUserIndex, UserOffset, NewMBB); + createNewWater(CPUserIndex, UserOffset, NewMBB); - // SplitBlockBeforeInstr adds to WaterList, which is important when it is + // splitBlockBeforeInstr adds to WaterList, which is important when it is // called while handling branches so that the water will be seen on the // next iteration for constant pools, but in this context, we don't want // it. Check for this so it will be removed from the WaterList. @@ -1430,10 +1447,10 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { MF->insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. - UpdateForInsertedWaterBlock(NewIsland); + updateForInsertedWaterBlock(NewIsland); // Decrement the old entry, and remove it if refcount becomes 0. - DecrementOldEntry(CPI, CPEMI); + decrementCPEReferenceCount(CPI, CPEMI); // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. @@ -1448,7 +1465,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; - AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); + adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1463,9 +1480,9 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { return true; } -/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update +/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update /// sizes and offsets of impacted basic blocks. -void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { +void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { MachineBasicBlock *CPEBB = CPEMI->getParent(); unsigned Size = CPEMI->getOperand(2).getImm(); CPEMI->eraseFromParent(); @@ -1480,7 +1497,7 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { // Entries are sorted by descending alignment, so realign from the front. CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); - AdjustBBOffsetsAfter(CPEBB); + adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if // this BB's predecessor jumps directly to this BB's successor. This // shouldn't happen currently. @@ -1488,15 +1505,15 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { // FIXME: remove the empty blocks after all the work is done? } -/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts +/// removeUnusedCPEntries - Remove constant pool entries whose refcounts /// are zero. -bool ARMConstantIslands::RemoveUnusedCPEntries() { +bool ARMConstantIslands::removeUnusedCPEntries() { unsigned MadeChange = false; for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { std::vector<CPEntry> &CPEs = CPEntries[i]; for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { - RemoveDeadCPEMI(CPEs[j].CPEMI); + removeDeadCPEMI(CPEs[j].CPEMI); CPEs[j].CPEMI = NULL; MadeChange = true; } @@ -1505,18 +1522,18 @@ bool ARMConstantIslands::RemoveUnusedCPEntries() { return MadeChange; } -/// BBIsInRange - Returns true if the distance between specific MI and +/// isBBInRange - Returns true if the distance between specific MI and /// specific BB can fit in MI's displacement field. -bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, +bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) { unsigned PCAdj = isThumb ? 4 : 8; - unsigned BrOffset = GetOffsetOf(MI) + PCAdj; + unsigned BrOffset = getOffsetOf(MI) + PCAdj; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() << " from BB#" << MI->getParent()->getNumber() << " max delta=" << MaxDisp - << " from " << GetOffsetOf(MI) << " to " << DestOffset + << " from " << getOffsetOf(MI) << " to " << DestOffset << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); if (BrOffset <= DestOffset) { @@ -1530,37 +1547,37 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, return false; } -/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far +/// fixupImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) { +bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); // Check to see if the DestBB is already in-range. - if (BBIsInRange(MI, DestBB, Br.MaxDisp)) + if (isBBInRange(MI, DestBB, Br.MaxDisp)) return false; if (!Br.isCond) - return FixUpUnconditionalBr(Br); - return FixUpConditionalBr(Br); + return fixupUnconditionalBr(Br); + return fixupConditionalBr(Br); } -/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is +/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is /// too far away to fit in its displacement field. If the LR register has been /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { +ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); if (!isThumb1) - llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!"); + llvm_unreachable("fixupUnconditionalBr is Thumb1 only!"); // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; MI->setDesc(TII->get(ARM::tBfar)); BBInfo[MBB->getNumber()].Size += 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); HasFarJump = true; ++NumUBrFixed; @@ -1569,11 +1586,11 @@ ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { return true; } -/// FixUpConditionalBr - Fix up a conditional branch whose destination is too +/// fixupConditionalBr - Fix up a conditional branch whose destination is too /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { +ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1607,7 +1624,7 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { // bne L2 // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { + if (isBBInRange(MI, NewDest, Br.MaxDisp)) { DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); BMI->getOperand(0).setMBB(DestBB); @@ -1619,7 +1636,7 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { } if (NeedSplit) { - SplitBlockBeforeInstr(MI); + splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); @@ -1651,14 +1668,14 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { // Remove the old conditional branch. It may or may not still be in MBB. BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); MI->eraseFromParent(); - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); return true; } -/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills +/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills /// LR / restores LR to pc. FIXME: This is done here because it's only possible /// to do this if tBfar is not used. -bool ARMConstantIslands::UndoLRSpillRestore() { +bool ARMConstantIslands::undoLRSpillRestore() { bool MadeChange = false; for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { MachineInstr *MI = PushPopMIs[i]; @@ -1677,26 +1694,26 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } -// mayOptimizeThumb2Instruction - Returns true if OptimizeThumb2Instructions +// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions // below may shrink MI. bool ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const { switch(MI->getOpcode()) { - // OptimizeThumb2Instructions. + // optimizeThumb2Instructions. case ARM::t2LEApcrel: case ARM::t2LDRpci: - // OptimizeThumb2Branches. + // optimizeThumb2Branches. case ARM::t2B: case ARM::t2Bcc: case ARM::tBcc: - // OptimizeThumb2JumpTables. + // optimizeThumb2JumpTables. case ARM::t2BR_JT: return true; } return false; } -bool ARMConstantIslands::OptimizeThumb2Instructions() { +bool ARMConstantIslands::optimizeThumb2Instructions() { bool MadeChange = false; // Shrink ADR and LDR from constantpool. @@ -1727,7 +1744,7 @@ bool ARMConstantIslands::OptimizeThumb2Instructions() { if (!NewOpc) continue; - unsigned UserOffset = GetUserOffset(U); + unsigned UserOffset = getUserOffset(U); unsigned MaxOffs = ((1 << Bits) - 1) * Scale; // Be conservative with inline asm. @@ -1735,22 +1752,23 @@ bool ARMConstantIslands::OptimizeThumb2Instructions() { MaxOffs -= 2; // FIXME: Check if offset is multiple of scale if scale is not 4. - if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + DEBUG(dbgs() << "Shrink: " << *U.MI); U.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = U.MI->getParent(); BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumT2CPShrunk; MadeChange = true; } } - MadeChange |= OptimizeThumb2Branches(); - MadeChange |= OptimizeThumb2JumpTables(); + MadeChange |= optimizeThumb2Branches(); + MadeChange |= optimizeThumb2JumpTables(); return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Branches() { +bool ARMConstantIslands::optimizeThumb2Branches() { bool MadeChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { @@ -1776,11 +1794,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { if (NewOpc) { unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); - if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { + if (isBBInRange(Br.MI, DestBB, MaxOffs)) { + DEBUG(dbgs() << "Shrink branch: " << *Br.MI); Br.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = Br.MI->getParent(); BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; MadeChange = true; } @@ -1797,7 +1816,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { NewOpc = 0; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) @@ -1807,7 +1826,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. - unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; + unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { MachineBasicBlock::iterator CmpMI = Br.MI; @@ -1815,11 +1834,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { --CmpMI; if (CmpMI->getOpcode() == ARM::tCMPi8) { unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = llvm::getInstrPredicate(CmpMI, PredReg); + Pred = getInstrPredicate(CmpMI, PredReg); if (Pred == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && isARMLowRegister(Reg)) { MachineBasicBlock *MBB = Br.MI->getParent(); + DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); MachineInstr *NewBR = BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags()); @@ -1827,7 +1847,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { Br.MI->eraseFromParent(); Br.MI = NewBR; BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; } @@ -1839,9 +1859,9 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { return MadeChange; } -/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller +/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. -bool ARMConstantIslands::OptimizeThumb2JumpTables() { +bool ARMConstantIslands::optimizeThumb2JumpTables() { bool MadeChange = false; // FIXME: After the tables are shrunk, can we get rid some of the @@ -1861,7 +1881,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { bool ByteOk = true; bool HalfWordOk = true; - unsigned JTOffset = GetOffsetOf(MI) + 4; + unsigned JTOffset = getOffsetOf(MI) + 4; const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { MachineBasicBlock *MBB = JTBBs[j]; @@ -1936,11 +1956,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { if (!OptOk) continue; + DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI + << " lea: " << *LeaMI); unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) .addImm(MI->getOperand(JTOpIdx+1).getImm()); + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); // FIXME: Insert an "ALIGN" instruction to ensure the next instruction // is 2-byte aligned. For now, asm printer will fix it up. unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); @@ -1954,7 +1977,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { int delta = OrigSize - NewSize; BBInfo[MBB->getNumber()].Size -= delta; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumTBs; MadeChange = true; @@ -1964,9 +1987,9 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { return MadeChange; } -/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that +/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that /// jump tables always branch forwards, since that's what tbb and tbh need. -bool ARMConstantIslands::ReorderThumb2JumpTables() { +bool ARMConstantIslands::reorderThumb2JumpTables() { bool MadeChange = false; MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); @@ -1995,7 +2018,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables() { // The destination precedes the switch. Try to move the block forward // so we have a positive offset. MachineBasicBlock *NewBB = - AdjustJTTargetBlockForward(MBB, MI->getParent()); + adjustJTTargetBlockForward(MBB, MI->getParent()); if (NewBB) MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB); MadeChange = true; @@ -2007,8 +2030,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables() { } MachineBasicBlock *ARMConstantIslands:: -AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) -{ +adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h index 1c4e532..6a84f8a 100644 --- a/lib/Target/ARM/ARMELFWriterInfo.h +++ b/lib/Target/ARM/ARMELFWriterInfo.h @@ -17,6 +17,7 @@ #include "llvm/Target/TargetELFWriterInfo.h" namespace llvm { + class TargetMachine; class ARMELFWriterInfo : public TargetELFWriterInfo { public: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c2b7816..5fc0360 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -19,7 +19,6 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -613,7 +612,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; @@ -794,15 +793,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, "base pointer without frame pointer?"); if (AFI->isThumb2Function()) { - llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *TII); + emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } else if (AFI->isThumbFunction()) { - llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, *TII, RI); + emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *TII, RI); } else { - llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, - *TII); + emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, + *TII); } // If there's dynamic realignment, adjust for it. if (RI.needsStackRealignment(MF)) { diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a24eab4..2e1eaca 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -16,7 +16,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" -#include "ARMRegisterInfo.h" #include "ARMTargetMachine.h" #include "ARMSubtarget.h" #include "ARMConstantPoolValue.h" @@ -2112,13 +2111,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) { } unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - - // iOS needs the r9 versions of the opcodes. - bool isiOS = Subtarget->isTargetIOS(); if (isThumb2) { - return isiOS ? ARM::tBLr9 : ARM::tBL; + return ARM::tBL; } else { - return isiOS ? ARM::BLr9 : ARM::BL; + return ARM::BL; } } @@ -2177,8 +2173,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for iOS, BL otherwise. - // TODO: Turn this into the table of arm call ops. + // Issue the call. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); if (isThumb2) @@ -2303,8 +2298,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for iOS, BL otherwise. - // TODO: Turn this into the table of arm call ops. + // Issue the call. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. @@ -2350,7 +2344,8 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { return Len <= 16; } -bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) { +bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, + uint64_t Len) { // Make sure we don't bloat code by inlining very large memcpy's. if (!ARMIsMemCpySmall(Len)) return false; @@ -2639,7 +2634,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, } namespace llvm { - llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index bd4b2a9..402ecb0 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -422,17 +422,16 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) { // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { - unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) - ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) - : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); + if (RetOpcode == ARM::TCRETURNdi) { + unsigned TCOpcode = STI.isThumb() ? + (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : + ARM::TAILJMPd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), @@ -449,10 +448,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); - } else if (RetOpcode == ARM::TCRETURNriND) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). - addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); @@ -648,9 +643,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || - RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || - RetOpcode == ARM::TCRETURNriND); + RetOpcode == ARM::TCRETURNri); SmallVector<unsigned, 4> Regs; unsigned i = CSI.size(); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ffb9acb..1eafbbc 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2825,7 +2825,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VZIPd32; break; + // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VZIPq8; break; case MVT::v8i16: Opc = ARM::VZIPq16; break; case MVT::v4f32: @@ -2844,7 +2845,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VUZPd32; break; + // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VUZPq8; break; case MVT::v8i16: Opc = ARM::VUZPq16; break; case MVT::v4f32: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e26dd22..a103c94 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -19,7 +19,6 @@ #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" @@ -508,7 +507,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); - + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); @@ -770,8 +769,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); + if (!Subtarget->hasVFP4()) { + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + } // Various VFP goodness if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { @@ -1642,7 +1643,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, /// and then confiscate the rest of the parameter registers to insure /// this. void -llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { +ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { unsigned reg = State->AllocateReg(GPRArgRegs, 4); assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && @@ -1672,7 +1673,7 @@ llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, - const ARMInstrInfo *TII) { + const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { @@ -1807,8 +1808,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const ARMInstrInfo *TII = - ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -1936,63 +1936,72 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return result; } -bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { +bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; - unsigned NumCopies = 0; - SDNode* Copies[2] = { 0, 0 }; - SDNode *Use = *N->use_begin(); - if (Use->getOpcode() == ISD::CopyToReg) { - Copies[NumCopies++] = Use; - } else if (Use->getOpcode() == ARMISD::VMOVRRD) { + SDValue TCChain = Chain; + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() == ISD::CopyToReg) { + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + return false; + TCChain = Copy->getOperand(0); + } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { + SDNode *VMov = Copy; // f64 returned in a pair of GPRs. - for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); + SmallPtrSet<SDNode*, 2> Copies; + for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::CopyToReg) return false; - Copies[UI.getUse().getResNo()] = *UI; - ++NumCopies; + Copies.insert(*UI); } - } else if (Use->getOpcode() == ISD::BITCAST) { + if (Copies.size() > 2) + return false; + + for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); + UI != UE; ++UI) { + SDValue UseChain = UI->getOperand(0); + if (Copies.count(UseChain.getNode())) + // Second CopyToReg + Copy = *UI; + else + // First CopyToReg + TCChain = UseChain; + } + } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. - if (!Use->hasNUsesOfValue(1, 0)) + if (!Copy->hasOneUse()) return false; - Use = *Use->use_begin(); - if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) + Copy = *Copy->use_begin(); + if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Copies[NumCopies++] = Use; + Chain = Copy->getOperand(0); } else { return false; } - if (NumCopies != 1 && NumCopies != 2) - return false; - bool HasRet = false; - for (unsigned i = 0; i < NumCopies; ++i) { - SDNode *Copy = Copies[i]; - for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); - UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - SDNode *Use = *UI; - if (Use == Copies[0] || ((NumCopies == 2) && (Use == Copies[1]))) - continue; - return false; - } - if (UI->getOpcode() != ARMISD::RET_FLAG) - return false; - HasRet = true; - } + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != ARMISD::RET_FLAG) + return false; + HasRet = true; } - return HasRet; + if (!HasRet) + return false; + + Chain = TCChain; + return true; } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARMTailCalls) + if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) return false; if (!CI->isTailCall()) @@ -3674,27 +3683,6 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } -SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) const { - if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) - return SDValue(); - - ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); - assert(Op.getValueType() == MVT::f32 && - "ConstantFP custom lowering should only occur for f32."); - - APFloat FPVal = CFP->getValueAPF(); - int ImmVal = ARM_AM::getFP32Imm(FPVal); - if (ImmVal == -1) - return SDValue(); - - DebugLoc DL = Op.getDebugLoc(); - SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); - SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, - DAG.getConstant(0, MVT::i32)); -} - /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. @@ -3831,6 +3819,58 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return DAG.getTargetConstant(EncodedVal, MVT::i32); } +SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const { + if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + return SDValue(); + + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + assert(Op.getValueType() == MVT::f32 && + "ConstantFP custom lowering should only occur for f32."); + + // Try splatting with a VMOV.f32... + APFloat FPVal = CFP->getValueAPF(); + int ImmVal = ARM_AM::getFP32Imm(FPVal); + if (ImmVal != -1) { + DebugLoc DL = Op.getDebugLoc(); + SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, + NewVal); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, + DAG.getConstant(0, MVT::i32)); + } + + // If that fails, try a VMOV.i32 + EVT VMovVT; + unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); + SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, + VMOVModImm); + if (NewVal != SDValue()) { + DebugLoc DL = Op.getDebugLoc(); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, + NewVal); + SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, + VecConstant); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, + DAG.getConstant(0, MVT::i32)); + } + + // Finally, try a VMVN.i32 + NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, + VMVNModImm); + if (NewVal != SDValue()) { + DebugLoc DL = Op.getDebugLoc(); + SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); + SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, + VecConstant); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, + DAG.getConstant(0, MVT::i32)); + } + + return SDValue(); +} + + static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); @@ -5795,7 +5835,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; unsigned MaxCSNum = 0; MachineModuleInfo &MMI = MF->getMMI(); - for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { + for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; + ++BB) { if (!BB->isLandingPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing @@ -5871,7 +5912,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); else if (!Subtarget->hasVFP2()) BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); - else + else BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); unsigned NumLPads = LPadList.size(); @@ -7308,15 +7349,99 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // Bitcast an i64 store extracted from a vector to f64. - // Otherwise, the i64 value will be legalized to a pair of i32 values. StoreSDNode *St = cast<StoreSDNode>(N); + if (St->isVolatile()) + return SDValue(); + + // Optimize trunc store (of multiple scalars) to shuffle and store. First, + // pack all of the elements in one place. Next, store to memory in fewer + // chunks. SDValue StVal = St->getValue(); - if (!ISD::isNormalStore(St) || St->isVolatile()) + EVT VT = StVal.getValueType(); + if (St->isTruncatingStore() && VT.isVector()) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); + + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems*SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + DebugLoc DL = St->getDebugLoc(); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; + } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector<SDValue, 8> Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + StoreType, ShuffWide, + DAG.getIntPtrConstant(I)); + SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], + Chains.size()); + } + + if (!ISD::isNormalStore(St)) return SDValue(); + // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and + // ARM stores of arguments in the same cache line. if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && - StVal.getNode()->hasOneUse() && !St->isVolatile()) { + StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; DebugLoc DL = St->getDebugLoc(); SDValue BasePtr = St->getBasePtr(); @@ -7337,6 +7462,8 @@ static SDValue PerformSTORECombine(SDNode *N, StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); + // Bitcast an i64 store extracted from a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; DebugLoc dl = StVal.getDebugLoc(); SDValue IntVec = StVal.getOperand(0); @@ -8259,8 +8386,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -8586,10 +8712,12 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, /// a register against the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) - return ARM_AM::getSOImmVal(Imm) != -1; + return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; if (Subtarget->isThumb2()) - return ARM_AM::getT2SOImmVal(Imm) != -1; + return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; + // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } @@ -8776,22 +8904,20 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, } void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), Mask, - KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index a71b74e..352d980 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -315,7 +315,6 @@ namespace llvm { SelectionDAG &DAG) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -494,7 +493,7 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 1d38bcf..f04926a 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -532,6 +532,7 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> let Inst{11-4} = 0b00001001; let Inst{3-0} = Rt2; + let Unpredictable{11-8} = 0b1111; let DecoderMethod = "DecodeSwap"; } diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 72af535..5d3e059 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -18,7 +18,6 @@ #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 8196582..1eb561d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -181,12 +181,8 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4">; -def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; -def HasNEON2 : Predicate<"Subtarget->hasNEON2()">, - AssemblerPredicate<"FeatureNEON2">; -def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -221,6 +217,14 @@ def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. +// But only select them if more precision in FP computation is allowed. +// Do not use them for Darwin platforms. +def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && " + "!Subtarget->isTargetDarwin()">; +def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " + "Subtarget->isTargetDarwin()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -251,7 +255,8 @@ def imm16_31 : ImmLeaf<i32, [{ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; + int64_t Value = -(int)N->getZExtValue(); + return Value && ARM_AM::getSOImmVal(Value) != -1; }], so_imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; } @@ -736,7 +741,7 @@ def postidx_reg : Operand<i32> { let DecoderMethod = "DecodePostIdxReg"; let PrintMethod = "printPostIdxRegOperand"; let ParserMatchClass = PostIdxRegAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } @@ -903,6 +908,11 @@ def p_imm : Operand<i32> { let DecoderMethod = "DecodeCoprocessor"; } +def pf_imm : Operand<i32> { + let PrintMethod = "printPImmediate"; + let ParserMatchClass = CoprocNumAsmOperand; +} + def CoprocRegAsmOperand : AsmOperandClass { let Name = "CoprocReg"; let ParserMethod = "parseCoprocRegOperand"; @@ -1182,6 +1192,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{19-16} = Rn; let Inst{15-12} = 0b0000; let Inst{11-0} = imm; + + let Unpredictable{15-12} = 0b1111; } def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rn, $Rm", @@ -1195,6 +1207,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{15-12} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; + + let Unpredictable{15-12} = 0b1111; } def rsi : AI1<opcod, (outs), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, @@ -1209,11 +1223,13 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{11-5} = shift{11-5}; let Inst{4} = 0; let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; } def rsr : AI1<opcod, (outs), - (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rn, $shift", - [(opnode GPR:$Rn, so_reg_reg:$shift)]> { + [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -1225,6 +1241,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{6-5} = shift{6-5}; let Inst{4} = 1; let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; } } @@ -1330,10 +1348,10 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{4} = 0; let Inst{3-0} = shift{3-0}; } - def rsr : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift), + def rsr : AsI1<opcod, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>, + [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; @@ -1367,7 +1385,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, cc_out:$s)>, Requires<[IsARM]>; def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, Requires<[IsARM]>; @@ -1907,7 +1925,7 @@ let isCall = 1, def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]> { + Requires<[IsARM]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1917,7 +1935,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]> { + Requires<[IsARM]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -1927,7 +1945,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotIOS]> { + Requires<[IsARM, HasV5T]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -1936,7 +1954,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotIOS]> { + Requires<[IsARM, HasV5T]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -1946,67 +1964,19 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsNotIOS]>; + Requires<[IsARM, HasV4T]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsNotIOS]>; + Requires<[IsARM, NoV4T]>; // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), - 4, IIC_Br, - [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>, - Requires<[IsARM, IsIOS]>; - - def BLr9_pred : ARMPseudoExpand<(outs), - (ins bl_target:$func, pred:$p, variable_ops), - 4, IIC_Br, - [(ARMcall_pred tglobaladdr:$func)], - (BL_pred bl_target:$func, pred:$p)>, - Requires<[IsARM, IsIOS]>; - - // ARMv5T and above - def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops), - 4, IIC_Br, - [(ARMcall GPR:$func)], - (BLX GPR:$func)>, - Requires<[IsARM, HasV5T, IsIOS]>; - - def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops), - 4, IIC_Br, - [(ARMcall_pred GPR:$func)], - (BLX_pred GPR:$func, pred:$p)>, - Requires<[IsARM, HasV5T, IsIOS]>; - - // ARMv4T - // Note: Restrict $func to the tGPR regclass to prevent it being in LR. - def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsIOS]>; - - // ARMv4 - def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsIOS]>; - - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def BMOVPCBr9_CALL : ARMPseudoInst<(outs),(ins bl_target:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM, IsIOS]>; + Requires<[IsARM]>; } let isBranch = 1, isTerminator = 1 in { @@ -2073,45 +2043,22 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // IOS versions. - let Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsIOS]>; - - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsIOS]>; - - def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), - 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsIOS]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), + IIC_Br, []>; - def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (BX GPR:$dst)>, - Requires<[IsARM, IsIOS]>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + IIC_Br, []>; - } - - // Non-IOS versions (the difference is R9). - let Uses = [SP] in { - def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotIOS]>; - - def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotIOS]>; + def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), + 4, IIC_Br, [], + (Bcc br_target:$dst, (ops 14, zero_reg))>, + Requires<[IsARM]>; - def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops), - 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsNotIOS]>; - - def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (BX GPR:$dst)>, - Requires<[IsARM, IsNotIOS]>; - } + def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + 4, IIC_Br, [], + (BX GPR:$dst)>, + Requires<[IsARM]>; } // Secure Monitor Call is a system instruction. @@ -2484,7 +2431,7 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{3-0} = offset{3-0}; let AsmMatchConverter = "cvtLdExtTWriteBackImm"; } - def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb), + def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb), (ins addr_offset_none:$addr, postidx_reg:$Rm), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc, "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> { @@ -2492,8 +2439,10 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{23} = Rm{4}; let Inst{22} = 0; let Inst{11-8} = 0; + let Unpredictable{11-8} = 0b1111; let Inst{3-0} = Rm{3-0}; let AsmMatchConverter = "cvtLdExtTWriteBackReg"; + let DecoderMethod = "DecodeLDR"; } } @@ -3241,6 +3190,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, let Inst{19-16} = Rn; let Inst{15-12} = Rd; let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1111; } // Saturating add/subtract @@ -3533,19 +3484,20 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, // property. Remove them when it's possible to add those properties // on an individual MachineInstr, not just an instuction description. let isCommutable = 1 in { -def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), +def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, Requires<[IsARM, HasV6]> { let Inst{15-12} = 0b0000; + let Unpredictable{15-12} = 0b1111; } let Constraints = "@earlyclobber $Rd" in -def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, +def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s), 4, IIC_iMUL32, - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))], - (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; } @@ -4040,10 +3992,13 @@ def BCCZi64 : PseudoInst<(outs), // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_reg_imm:$shift, pred:$p), 4, IIC_iCMOVsr, @@ -4164,7 +4119,7 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } -// Pseudo isntruction that combines movs + predicated rsbmi +// Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS let usesCustomInserter = 1, Defs = [CPSR] in { def ABS : ARMPseudoInst< @@ -4325,9 +4280,9 @@ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { -def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), +def SWP : AIswp<0, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>; -def SWPB: AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), +def SWPB: AIswp<1, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>; } @@ -4356,7 +4311,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; } -def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, +def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, @@ -4635,7 +4590,7 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, class MovRRCopro<string opc, bit direction, list<dag> pattern = []> : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -4654,13 +4609,13 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []> } def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, + [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, + GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { let Inst{31-28} = 0b1111; let Inst{23-21} = 0b010; @@ -4677,10 +4632,12 @@ class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> let Inst{11-8} = cop; let Inst{7-4} = opc1; let Inst{3-0} = CRm; + + let DecoderMethod = "DecodeMRRC2"; } def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, + [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; @@ -4689,22 +4646,32 @@ def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; // // Move to ARM core register from Special Register -def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, +def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []> { bits<4> Rd; let Inst{23-16} = 0b00001111; + let Unpredictable{19-17} = 0b111; + let Inst{15-12} = Rd; - let Inst{7-4} = 0b0000; + + let Inst{11-0} = 0b000000000000; + let Unpredictable{11-0} = 0b110100001111; } -def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPR:$Rd, pred:$p)>, Requires<[IsARM]>; +def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>, Requires<[IsARM]>; -def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, +// The MRSsys instruction is the MRS instruction from the ARM ARM, +// section B9.3.9, with the R bit set to 1. +def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []> { bits<4> Rd; let Inst{23-16} = 0b01001111; + let Unpredictable{19-16} = 0b1111; + let Inst{15-12} = Rd; - let Inst{7-4} = 0b0000; + + let Inst{11-0} = 0b000000000000; + let Unpredictable{11-0} = 0b110100001111; } // Move from ARM core register to Special Register @@ -4868,36 +4835,15 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // TODO: add,sub,and, 3-instr forms? -// Tail calls -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNri tcGPR:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotIOS]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; +// Tail calls. These patterns also apply to Thumb mode. +def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>; +def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>; +def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>; // Direct calls -def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsARM, IsNotIOS]>; -def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsARM, IsIOS]>; -def : ARMPat<(ARMcall_nolink texternalsym:$func), - (BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsARM, IsNotIOS]>; +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; def : ARMPat<(ARMcall_nolink texternalsym:$func), - (BMOVPCBr9_CALL texternalsym:$func)>, - Requires<[IsARM, IsIOS]>; + (BMOVPCB_CALL texternalsym:$func)>; // zextload i1 -> zextload i8 def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f61eb2b..fd8ac0b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -530,16 +530,16 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ // Use VLDM to load a Q register as a D register pair. // This is a pseudo instruction that is expanded to VLDMD after reg alloc. def VLDMQIA - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), + : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; + [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), + : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; + [(store (v2f64 DPair:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -1938,20 +1938,11 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin> // VST1LN : Vector Store (single element from one lane) class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), + (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVST1LN"; -} -class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), - IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } @@ -1962,16 +1953,17 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> } def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, + addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -1987,14 +1979,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), // ...with address register writeback: class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, + (ins AdrMode:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - addrmode6:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> @@ -2004,16 +1996,16 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> } def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, - extractelt> { + extractelt, addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -3642,7 +3634,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + string baseOpc, SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { @@ -3676,6 +3668,33 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx + + // Aliases for two-operand forms (source and dest regs the same). + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8")) + DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16")) + DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v2i32")) + DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v1i64")) + DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8")) + QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16")) + QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32")) + QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v2i64")) + QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; } // Neon Shift-Accumulate vector operations, @@ -3986,10 +4005,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -4044,10 +4063,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -4096,23 +4115,36 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; - // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)), + (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)), + (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; // Vector Subtract Operations. @@ -4614,8 +4646,10 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; -defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", + NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", + NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -4649,8 +4683,10 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; -defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", + NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", + NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -4795,12 +4831,12 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd, DPR:$Vd1), (ins DPR:$Vm, DPR:$Vm1), - NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd, QPR:$Vd1), (ins QPR:$Vm, QPR:$Vm1), - NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", []>; // Vector Move Operations. @@ -5342,7 +5378,9 @@ def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; +// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; @@ -5352,7 +5390,9 @@ def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; +// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; @@ -5462,13 +5502,13 @@ def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; def : N3VSMulOpPat<fmul, fadd, VMLAfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat<fmul, fsub, VMLSfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat<fmul, fadd, VFMAfd>, - Requires<[HasNEON2, UseNEONForFP,FPContractions]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N3VSMulOpPat<fmul, fsub, VFMSfd>, - Requires<[HasNEON2, UseNEONForFP,FPContractions]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; @@ -5594,6 +5634,7 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length. +// // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = // Pat<(v4i32 (extloadvi8 addrmode5:$addr)) // (EXTRACT_SUBREG (VMOVLuv4i32 @@ -5604,28 +5645,63 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, // qsub_0)>; multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, string Insn1Lanes, string Insn1Ty, string Insn2Lanes, - string Insn2Ty, SubRegIndex RegType> { + string Insn2Ty> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length, but which ends up only +// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). +// +// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty> { def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; } defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 @@ -5636,12 +5712,12 @@ defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 -// Double lengthening - v4i8 -> v4i16 -> v4i32 -defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>; +// Double lengthening - v4i8 -> v4i16 -> v4i32 +defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; // v2i8 -> v2i16 -> v2i32 -defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>; +defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; // v2i16 -> v2i32 -> v2i64 -defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>; +defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), @@ -5911,7 +5987,7 @@ def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -// VSHL (immediate) two-operand aliases. +// VSHR (immediate) two-operand aliases. def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", @@ -5948,6 +6024,41 @@ def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; +// VRSHL two-operand aliases. +def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", + (VRSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", + (VRSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", + (VRSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", + (VRSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", + (VRSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", + (VRSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", + (VRSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", + (VRSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", + (VRSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", + (VRSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", + (VRSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", + (VRSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", + (VRSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", + (VRSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", + (VRSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", + (VRSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", @@ -6911,6 +7022,100 @@ def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; +// Two-operand variants for VHSUB. + // Signed. +def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", + (VHSUBsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", + (VHSUBsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", + (VHSUBsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", + (VHSUBsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", + (VHSUBsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", + (VHSUBsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", + (VHSUBuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", + (VHSUBuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", + (VHSUBuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", + (VHSUBuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", + (VHSUBuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", + (VHSUBuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + +// Two-operand variants for VHADD. + // Signed. +def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", + (VHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", + (VHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", + (VHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", + (VHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", + (VHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", + (VHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", + (VHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", + (VHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", + (VHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", + (VHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", + (VHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", + (VHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VRHADD. + // Signed. +def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", + (VRHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", + (VRHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", + (VRHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; + +def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", + (VRHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", + (VRHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", + (VRHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", + (VRHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", + (VRHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", + (VRHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; + +def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", + (VRHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", + (VRHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", + (VRHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; + // VSWP allows, but does not require, a type suffix. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ba1791b..6335229 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -91,6 +91,12 @@ def t_imm0_508s4 : Operand<i32> { let ParserMatchClass = t_imm0_508s4_asmoperand; let OperandType = "OPERAND_IMMEDIATE"; } +// Alias use only, so no printer is necessary. +def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; } +def t_imm0_508s4_neg : Operand<i32> { + let ParserMatchClass = t_imm0_508s4_neg_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} // Define Thumb specific addressing modes. @@ -345,6 +351,11 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), let DecoderMethod = "DecodeThumbAddSPImm"; } +def : tInstAlias<"add${p} sp, $imm", + (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; +def : tInstAlias<"add${p} sp, sp, $imm", + (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; + // Can optionally specify SP as a three operand instruction. def : tInstAlias<"add${p} sp, sp, $imm", (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>; @@ -405,14 +416,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // prevent stack-pointer assignments that appear immediately before calls from // potentially appearing dead. let isCall = 1, - // On non-IOS platforms R9 is callee-saved. Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotIOS]> { + Requires<[IsThumb]> { bits<22> func; let Inst{26} = func{21}; let Inst{25-16} = func{20-11}; @@ -426,7 +436,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsNotIOS]> { + Requires<[IsThumb, HasV5T]> { bits<21> func; let Inst{25-16} = func{20-11}; let Inst{13} = 1; @@ -439,7 +449,7 @@ let isCall = 1, def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsNotIOS]>, + Requires<[IsThumb, HasV5T]>, T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; @@ -450,37 +460,7 @@ let isCall = 1, def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - // Also used for Thumb2 - def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), - 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], - (tBL pred:$p, t_bltarget:$func)>, - Requires<[IsThumb, IsIOS]>; - - // ARMv5T and above, also used for Thumb2 - def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), - 4, IIC_Br, [(ARMcall tglobaladdr:$func)], - (tBLXi pred:$p, t_blxtarget:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; - - // Also used for Thumb2 - def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops), - 2, IIC_Br, [(ARMtcall GPR:$func)], - (tBLXr pred:$p, GPR:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; - - // ARMv4T - def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 4, IIC_Br, - [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsIOS]>; + Requires<[IsThumb, IsThumb1Only]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { @@ -524,24 +504,20 @@ let isBranch = 1, isTerminator = 1 in let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. let Uses = [SP] in { - // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls - // on IOS), so it's in ARMInstrThumb2.td. def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; } - // Non-IOS versions (the difference is R9). + // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls + // on IOS), so it's in ARMInstrThumb2.td. + // Non-IOS version: let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, Requires<[IsThumb, IsNotIOS]>; - def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsNotIOS]>; } } @@ -1307,20 +1283,14 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Direct calls def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, - Requires<[IsThumb, IsNotIOS]>; -def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsNotIOS]>; -def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; + Requires<[IsThumb, HasV5T]>; // Indirect calls to ARM routines def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsNotIOS]>; -def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsIOS]>; + Requires<[IsThumb, HasV5T]>; // zextload i1 -> zextload i8 def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), @@ -1437,3 +1407,11 @@ def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; def : tInstAlias<"neg${s}${p} $Rd, $Rm", (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + +// Implied destination operand forms for shifts. +def : tInstAlias<"lsl${s}${p} $Rdm, $imm", + (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>; +def : tInstAlias<"lsr${s}${p} $Rdm, $imm", + (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; +def : tInstAlias<"asr${s}${p} $Rdm, $imm", + (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 1f7edc1..e6fb9d5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -89,20 +89,26 @@ def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; + int64_t Value = -(int)N->getZExtValue(); + return Value && ARM_AM::getT2SOImmVal(Value) != -1; }], t2_so_imm_neg_XFORM> { let ParserMatchClass = t2_so_imm_neg_asmoperand; } /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095 : Operand<i32>, - ImmLeaf<i32, [{ +def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; } +def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 4096; -}]>; +}]> { + let ParserMatchClass = imm0_4095_asmoperand; +} -def imm0_4095_neg : PatLeaf<(i32 imm), [{ +def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; } +def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 4096; -}], imm_neg_XFORM>; +}], imm_neg_XFORM> { + let ParserMatchClass = imm0_4095_neg_asmoperand; +} def imm0_255_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 255; @@ -2871,6 +2877,8 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, pred:$p), 4, IIC_iCMOVr, @@ -3189,6 +3197,7 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{13} = target{17}; let Inst{21-16} = target{16-11}; let Inst{10-0} = target{10-0}; + let DecoderMethod = "DecodeT2BInstruction"; } let isNotDuplicable = 1, isIndirectBranch = 1 in { @@ -3268,37 +3277,19 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb2, IsIOS]>; } -let isCall = 1, - // On non-IOS platforms R9 is callee-saved. - Defs = [LR], Uses = [SP] in { +let isCall = 1, Defs = [LR], Uses = [SP] in { // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def t2BMOVPCB_CALL : tPseudoInst<(outs), (ins t_bltarget:$func, variable_ops), 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def t2BMOVPCBr9_CALL : tPseudoInst<(outs), - (ins t_bltarget:$func, variable_ops), - 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; } // Direct calls def : T2Pat<(ARMcall_nolink texternalsym:$func), (t2BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsThumb, IsNotIOS]>; -def : T2Pat<(ARMcall_nolink texternalsym:$func), - (t2BMOVPCBr9_CALL texternalsym:$func)>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; // IT block let Defs = [ITSTATE] in @@ -3966,6 +3957,19 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// add w/ negative immediates is just a sub. +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rd, $Rn, $imm", + (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p} $Rdn, $imm", + (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rdn, $imm", + (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + + // Aliases for SUB without the ".w" optional width specifier. def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; @@ -3981,13 +3985,14 @@ def : t2InstAlias<"sub${s}${p} $Rdn, $imm", (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm", + (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $Rm", (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm", (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; - // Alias for compares without the ".w" optional width specifier. def : t2InstAlias<"cmn${p} $Rn, $Rm", (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e9d5720..3600b88 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -950,7 +950,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -958,7 +958,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -966,10 +966,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -977,7 +977,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -985,7 +985,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -993,10 +993,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1004,7 +1004,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1012,7 +1012,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1020,10 +1020,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1031,14 +1031,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1046,10 +1046,10 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; //===----------------------------------------------------------------------===// // Fused FP Multiply-Accumulate Operations. @@ -1060,7 +1060,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1068,17 +1068,25 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)), + (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)), + (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFMSD : ADbI<0b11101, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1086,7 +1094,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1094,17 +1102,33 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fma (fneg x), y, z) -> (vfms x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fneg (fma x, (fneg y), z) -> (vfms x, y, z) +def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFNMAD : ADbI<0b11101, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1112,7 +1136,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1120,17 +1144,33 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fneg (fma x, y, z)) -> (vfnma x, y, z) +def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFNMSD : ADbI<0b11101, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1138,24 +1178,40 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z) +def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma x, (fneg y), z) -> (vnfms x, y, z) +def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; //===----------------------------------------------------------------------===// // FP Conditional moves. diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 753e578..c5db211 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -13,7 +13,7 @@ #define DEBUG_TYPE "jit" #include "ARMJITInfo.h" -#include "ARMInstrInfo.h" +#include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 0f6dc04..9ef2ace 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -15,8 +15,8 @@ #define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -93,7 +93,9 @@ namespace { bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); + DebugLoc dl, + ArrayRef<std::pair<unsigned, bool> > Regs, + ArrayRef<unsigned> ImpDefs); void MergeOpsUpdate(MachineBasicBlock &MBB, MemOpQueue &MemOps, unsigned memOpsBegin, @@ -282,7 +284,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector<std::pair<unsigned, bool>, 8> &Regs) { + ArrayRef<std::pair<unsigned, bool> > Regs, + ArrayRef<unsigned> ImpDefs) { // Only a single register to load / store. Don't bother. unsigned NumRegs = Regs.size(); if (NumRegs <= 1) @@ -350,6 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); + // Add implicit defs for super-registers. + for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) + MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); + return true; } @@ -384,19 +391,29 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, } SmallVector<std::pair<unsigned, bool>, 8> Regs; + SmallVector<unsigned, 8> ImpDefs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; // If we are inserting the merged operation after an operation that // uses the same register, make sure to transfer any kill flag. bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); + + // Collect any implicit defs of super-registers. They must be preserved. + for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) + continue; + unsigned DefReg = MO->getReg(); + if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) + ImpDefs.push_back(DefReg); + } } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, - Pred, PredReg, Scratch, dl, Regs)) + Pred, PredReg, Scratch, dl, Regs, ImpDefs)) return; // Merge succeeded, update records. @@ -537,7 +554,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && + getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -570,7 +587,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && + getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -701,7 +718,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, bool BaseKill = MI->getOperand(0).isKill(); unsigned Bytes = getLSMultipleTransferSize(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); @@ -854,7 +871,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool DoMerge = false; ARM_AM::AddrOpc AddSub = ARM_AM::add; unsigned NewOpc = 0; @@ -1112,7 +1129,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); int OffImm = getMemoryOpOffset(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a @@ -1143,6 +1160,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned NewOpc = (isLd) ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); + // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, + // so adjust and use t2LDRi12 here for that. + unsigned NewOpc2 = (isLd) + ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. @@ -1150,11 +1172,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, (BaseKill || OffKill) && (TRI->regsOverlap(EvenReg, BaseReg))) { assert(!TRI->regsOverlap(OddReg, BaseReg)); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); + if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0) + NewOpc = ARM::t2LDRi12; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1167,12 +1191,16 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenDeadKill = false; OddDeadKill = true; } + // Never kill the base register in the first instruction. + // <rdar://problem/11101911> + if (EvenReg == BaseReg) + EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); @@ -1223,7 +1251,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] @@ -1599,7 +1627,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (EvenReg == OddReg) return false; BaseReg = Op0->getOperand(1).getReg(); - Pred = llvm::getInstrPredicate(Op0, PredReg); + Pred = getInstrPredicate(Op0, PredReg); dl = Op0->getDebugLoc(); return true; } @@ -1796,7 +1824,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; - if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) + if (getInstrPredicate(MI, PredReg) != ARMCC::AL) continue; int Opc = MI->getOpcode(); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 1327fb8..1466e98 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -314,7 +314,8 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, (interleave QPR, TuplesOE2D)> { // Allocate starting at non-VFP2 registers D16-D31 first. - let AltOrders = [(rotl DPair, 16)]; + // Prefer even-odd pairs as they are easier to copy. + let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))]; let AltOrderSelect = [{ return 1; }]; } diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 8d86c01..8b1fb93 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -324,6 +324,15 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<19, [A8_NPipe], 0>, InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<20, [A8_NPipe], 0>, @@ -860,6 +869,16 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, + // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 49fedf6..0d710cc 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -604,6 +604,22 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_NPipe]>], [9, 1, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [8, 1, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [9, 1, 1, 1]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, @@ -1697,6 +1713,26 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [6, 3, 2, 1]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe]>], + [8, 4, 2, 1]>, + // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 4d959f5..0ace9bc 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -243,6 +243,12 @@ def ARMV6Itineraries : ProcessorItineraries< // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 1e8cda5..ca172ed 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -16,7 +16,6 @@ #include "llvm/GlobalValue.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -49,7 +48,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVFPv3(false) , HasVFPv4(false) , HasNEON(false) - , HasNEON2(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3d9c03d..e72b06f 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -45,13 +45,12 @@ protected: bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; bool HasNEON; - bool HasNEON2; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to @@ -205,7 +204,6 @@ protected: bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } bool hasNEON() const { return HasNEON; } - bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 44229ad..047efc2 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -158,8 +158,10 @@ bool ARMPassConfig::addPreRegAlloc() { bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) + if (!getARMSubtarget().isThumb1Only()) { PM.add(createARMLoadStoreOptimizationPass()); + printAndVerify("After ARM load / store optimizer"); + } if (getARMSubtarget().hasNEON()) PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } @@ -192,7 +194,8 @@ bool ARMPassConfig::addPreEmitPass() { return true; } -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { +bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); return false; diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index eb8aaf2..fda8536 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -17,8 +17,6 @@ #include "llvm/Support/TargetRegistry.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include <string> diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 911eb13..2c53e3f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -82,8 +82,14 @@ class ARMAsmParser : public MCTargetAsmParser { MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + bool Warning(SMLoc L, const Twine &Msg, + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + return Parser.Warning(L, Msg, Ranges); + } + bool Error(SMLoc L, const Twine &Msg, + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + return Parser.Error(L, Msg, Ranges); + } int tryParseRegister(); bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); @@ -478,6 +484,8 @@ public: /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + ARMCC::CondCodes getCondCode() const { assert(Kind == k_CondCode && "Invalid access!"); return CC.Val; @@ -579,6 +587,14 @@ public: int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 508; } + bool isImm0_508s4Neg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = -CE->getValue(); + // explicitly exclude zero. we want that to use the normal 0_508 version. + return ((Value & 3) == 0) && Value > 0 && Value <= 508; + } bool isImm0_255() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -586,6 +602,20 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } + bool isImm0_4095() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 4096; + } + bool isImm0_4095Neg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = -CE->getValue(); + return Value > 0 && Value < 4096; + } bool isImm0_1() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -782,7 +812,9 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return ARM_AM::getSOImmVal(-Value) != -1; + // Only use this when not representable as a plain so_imm. + return ARM_AM::getSOImmVal(Value) == -1 && + ARM_AM::getSOImmVal(-Value) != -1; } bool isT2SOImm() const { if (!isImm()) return false; @@ -803,7 +835,9 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return ARM_AM::getT2SOImmVal(-Value) != -1; + // Only use this when not representable as a plain so_imm. + return ARM_AM::getT2SOImmVal(Value) == -1 && + ARM_AM::getT2SOImmVal(-Value) != -1; } bool isSetEndImm() const { if (!isImm()) return false; @@ -1495,6 +1529,14 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); } + void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate is scaled by four in the encoding and is stored + // in the MCInst as such. Lop off the low two bits here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4))); + } + void addImm0_508s4Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored @@ -1553,6 +1595,14 @@ public: Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); } + void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually an imm0_4095, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise @@ -3324,7 +3374,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { FlagsVal = 8; // No flag } } else if (SpecReg == "cpsr" || SpecReg == "spsr") { - if (Flags == "all") // cpsr_all is an alias for cpsr_fc + // cpsr_all is an alias for cpsr_fc, as is plain cpsr. + if (Flags == "all" || Flags == "") Flags = "fc"; for (int i = 0, e = Flags.size(); i != e; ++i) { unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1)) @@ -4475,22 +4526,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, case AsmToken::Dollar: case AsmToken::Hash: { // #42 -> immediate. - // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate S = Parser.getTok().getLoc(); Parser.Lex(); - bool isNegative = Parser.getTok().is(AsmToken::Minus); - const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) - return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); - if (CE) { - int32_t Val = CE->getValue(); - if (isNegative && Val == 0) - ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); + + if (Parser.getTok().isNot(AsmToken::Colon)) { + bool isNegative = Parser.getTok().is(AsmToken::Minus); + const MCExpr *ImmVal; + if (getParser().ParseExpression(ImmVal)) + return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); + if (CE) { + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); + } + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); + return false; } - E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); - return false; + // w/ a ':' after the '#', it's just like a plain ':'. + // FALLTHROUGH } case AsmToken::Colon: { // ":lower16:" and ":upper16:" expression prefixes @@ -4616,6 +4671,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || + Mnemonic == "vfms" || Mnemonic == "vfnms" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4659,6 +4715,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "orr" || Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || + Mnemonic == "vfm" || Mnemonic == "vfnm" || (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" || Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"))) { @@ -4727,7 +4784,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[4])->isReg() && static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP && static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - (static_cast<ARMOperand*>(Operands[5])->isReg() || + ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) || static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4())) return true; // For Thumb2, add/sub immediate does not have a cc_out operand for the @@ -4811,7 +4868,10 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, (Operands.size() == 5 || Operands.size() == 6) && static_cast<ARMOperand*>(Operands[3])->isReg() && static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + (static_cast<ARMOperand*>(Operands[4])->isImm() || + (Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[5])->isImm()))) return true; return false; @@ -6602,6 +6662,37 @@ processInstruction(MCInst &Inst, return true; } + // Handle encoding choice for the shift-immediate instructions. + case ARM::t2LSLri: + case ARM::t2LSRri: + case ARM::t2ASRri: { + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !(static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2LSLri: NewOpc = ARM::tLSLri; break; + case ARM::t2LSRri: NewOpc = ARM::tLSRri; break; + case ARM::t2ASRri: NewOpc = ARM::tASRri; break; + } + // The Thumb1 operands aren't in the same order. Awesome, eh? + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } + // Handle the Thumb2 mode MOV complex aliases. case ARM::t2MOVsr: case ARM::t2MOVSsr: { @@ -6833,7 +6924,7 @@ processInstruction(MCInst &Inst, // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if <Rd> is specified and encoding T2 is preferred // to encoding T1 if <Rd> is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { + if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tADDi3); return true; } @@ -6843,11 +6934,37 @@ processInstruction(MCInst &Inst, // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if <Rd> is specified and encoding T2 is preferred // to encoding T1 if <Rd> is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { + if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tSUBi3); return true; } break; + case ARM::t2ADDri: + case ARM::t2SUBri: { + // If the destination and first source operand are the same, and + // the flags are compatible with the current IT status, use encoding T2 + // instead of T3. For compatibility with the system 'as'. Make sure the + // wide encoding wasn't explicit. + if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + !isARMLowRegister(Inst.getOperand(0).getReg()) || + (unsigned)Inst.getOperand(2).getImm() > 255 || + ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || + (inITBlock() && Inst.getOperand(5).getReg() != 0)) || + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + break; + MCInst TmpInst; + TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? + ARM::tADDi8 : ARM::tSUBi8); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } case ARM::t2ADDrr: { // If the destination and first source operand are the same, and // there's no setting of the flags, use encoding T2 instead of T3. @@ -6964,7 +7081,7 @@ processInstruction(MCInst &Inst, // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - Inst.getOperand(1).getImm() <= 255 && + (unsigned)Inst.getOperand(1).getImm() <= 255 && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR) || (inITBlock() && Inst.getOperand(4).getReg() == 0)) && @@ -7216,7 +7333,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(ErrorLoc, "invalid operand for instruction"); } case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction"); + return Error(IDLoc, "invalid instruction", + ((ARMOperand*)Operands[0])->getLocRange()); case Match_ConversionFail: // The converter function will have already emited a diagnostic. return true; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ce4587b..912935d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -9,8 +9,6 @@ #define DEBUG_TYPE "arm-disassembler" -#include "ARM.h" -#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -20,6 +18,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" @@ -103,228 +102,232 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { // Forward declare these because the autogenerated code will reference them. // Definitions are further down. -static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, +static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst, +static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst, unsigned Insn, uint64_t Adddress, const void *Decoder); -static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val, +static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); - - +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" #include "ARMGenInstrInfo.inc" #include "ARMGenEDInfo.inc" @@ -856,7 +859,7 @@ static const uint16_t GPRDecoderTable[] = { ARM::R12, ARM::SP, ARM::LR, ARM::PC }; -static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -867,7 +870,7 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -879,14 +882,14 @@ DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return S; } -static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { unsigned Register = 0; switch (RegNo) { @@ -916,7 +919,7 @@ static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -933,7 +936,7 @@ static const uint16_t SPRDecoderTable[] = { ARM::S28, ARM::S29, ARM::S30, ARM::S31 }; -static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -954,7 +957,7 @@ static const uint16_t DPRDecoderTable[] = { ARM::D28, ARM::D29, ARM::D30, ARM::D31 }; -static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -964,7 +967,7 @@ static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; @@ -972,7 +975,7 @@ static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -987,7 +990,7 @@ static const uint16_t QPRDecoderTable[] = { }; -static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1007,7 +1010,7 @@ static const uint16_t DPairDecoderTable[] = { ARM::Q15 }; -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 30) return MCDisassembler::Fail; @@ -1028,7 +1031,7 @@ static const uint16_t DPairSpacedDecoderTable[] = { ARM::D28_D30, ARM::D29_D31 }; -static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { @@ -1040,7 +1043,7 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xF) return MCDisassembler::Fail; // AL predicate is not allowed on Thumb1 branches. @@ -1054,7 +1057,7 @@ static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val) Inst.addOperand(MCOperand::CreateReg(ARM::CPSR)); @@ -1063,7 +1066,7 @@ static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { uint32_t imm = Val & 0xFF; uint32_t rot = (Val & 0xF00) >> 7; @@ -1072,7 +1075,7 @@ static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1109,7 +1112,7 @@ static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1144,7 +1147,7 @@ static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1179,7 +1182,7 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1196,7 +1199,7 @@ static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1213,7 +1216,7 @@ static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { // This operand encodes a mask of contiguous zeros between a specified MSB // and LSB. To decode it, we create the mask of all bits MSB-and-lower, @@ -1234,7 +1237,7 @@ static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1379,7 +1382,7 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1482,7 +1485,7 @@ DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1523,7 +1526,7 @@ static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, +DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1536,6 +1539,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, unsigned pred = fieldFromInstruction32(Insn, 28, 4); unsigned W = fieldFromInstruction32(Insn, 21, 1); unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt2 = Rt + 1; bool writeback = (W == 1) | (P == 0); @@ -1547,7 +1551,86 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: - if (Rt & 0x1) return MCDisassembler::Fail; + if (Rt & 0x1) S = MCDisassembler::SoftFail; + break; + default: + break; + } + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + if (P == 0 && W == 1) + S = MCDisassembler::SoftFail; + + if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2)) + S = MCDisassembler::SoftFail; + if (type && Rm == 15) + S = MCDisassembler::SoftFail; + if (Rt2 == 15) + S = MCDisassembler::SoftFail; + if (!type && fieldFromInstruction32(Insn, 8, 4)) + S = MCDisassembler::SoftFail; + break; + case ARM::STRH: + case ARM::STRH_PRE: + case ARM::STRH_POST: + if (Rt == 15) + S = MCDisassembler::SoftFail; + if (writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; + if (!type && Rm == 15) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + if (type && Rn == 15){ + if (Rt2 == 15) + S = MCDisassembler::SoftFail; + break; + } + if (P == 0 && W == 1) + S = MCDisassembler::SoftFail; + if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2)) + S = MCDisassembler::SoftFail; + if (!type && writeback && Rn == 15) + S = MCDisassembler::SoftFail; + if (writeback && (Rn == Rt || Rn == Rt2)) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRH: + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + if (type && Rn == 15){ + if (Rt == 15) + S = MCDisassembler::SoftFail; + break; + } + if (Rt == 15) + S = MCDisassembler::SoftFail; + if (!type && Rm == 15) + S = MCDisassembler::SoftFail; + if (!type && writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRSH: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + case ARM::LDRSB: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + if (type && Rn == 15){ + if (Rt == 15) + S = MCDisassembler::SoftFail; + break; + } + if (type && (Rt == 15 || (writeback && Rn == Rt))) + S = MCDisassembler::SoftFail; + if (!type && (Rt == 15 || Rm == 15)) + S = MCDisassembler::SoftFail; + if (!type && writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; break; default: break; @@ -1634,7 +1717,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1663,7 +1746,7 @@ static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, +static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1748,7 +1831,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, return S; } -static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction32(Insn, 18, 2); unsigned M = fieldFromInstruction32(Insn, 17, 1); @@ -1788,7 +1871,7 @@ static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction32(Insn, 9, 2); unsigned M = fieldFromInstruction32(Insn, 8, 1); @@ -1828,7 +1911,7 @@ static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1852,7 +1935,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1878,7 +1961,7 @@ static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1906,7 +1989,7 @@ static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1926,7 +2009,7 @@ static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1945,13 +2028,28 @@ static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); } static DecodeStatus -DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2BInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) | + (fieldFromInstruction32(Insn, 11, 1) << 18) | + (fieldFromInstruction32(Insn, 13, 1) << 17) | + (fieldFromInstruction32(Insn, 16, 6) << 11) | + (fieldFromInstruction32(Insn, 26, 1) << 19); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); + return S; +} + +static DecodeStatus +DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1977,7 +2075,7 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1994,7 +2092,7 @@ static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2183,6 +2281,8 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD2b8wb_register: case ARM::VLD2b16wb_register: case ARM::VLD2b32wb_register: + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2251,12 +2351,22 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; break; + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + break; } return S; } -static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2319,6 +2429,8 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST2b8wb_register: case ARM::VST2b16wb_register: case ARM::VST2b32wb_register: + if (Rm == 0xF) + return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(0)); break; case ARM::VST3d8_UPD: @@ -2525,7 +2637,7 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2570,7 +2682,7 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2580,7 +2692,6 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); - unsigned pred = fieldFromInstruction32(Insn, 22, 4); align *= 2*size; switch (Inst.getOpcode()) { @@ -2611,20 +2722,15 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { + if (Rm != 0xD && Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; } - if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) - return MCDisassembler::Fail; - return S; } -static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2659,7 +2765,7 @@ static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2712,7 +2818,7 @@ static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2757,7 +2863,7 @@ DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2776,31 +2882,31 @@ static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(8 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(16 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(32 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(64 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2836,7 +2942,7 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2860,25 +2966,31 @@ static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, return S; } -static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2893,7 +3005,7 @@ static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2907,7 +3019,7 @@ static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { unsigned imm = Val << 2; @@ -2917,7 +3029,7 @@ static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateReg(ARM::SP)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -2925,7 +3037,7 @@ static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2942,7 +3054,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2997,7 +3109,7 @@ static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { int imm = Val & 0xFF; if (!(Val & 0x100)) imm *= -1; @@ -3006,7 +3118,7 @@ static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3021,7 +3133,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3036,7 +3148,7 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, return S; } -static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { int imm = Val & 0xFF; if (Val == 0) @@ -3049,7 +3161,7 @@ static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, } -static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3080,7 +3192,7 @@ static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3110,7 +3222,7 @@ static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3125,7 +3237,7 @@ static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, } -static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imm = fieldFromInstruction16(Insn, 0, 7); @@ -3136,7 +3248,7 @@ static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3161,7 +3273,7 @@ static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, return S; } -static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2; unsigned flags = fieldFromInstruction16(Insn, 0, 3); @@ -3172,20 +3284,20 @@ static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned add = fieldFromInstruction32(Insn, 4, 1); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(add)); return S; } -static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!tryAddingSymbolicOperand(Address, (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, @@ -3194,7 +3306,7 @@ static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xA || Val == 0xB) return MCDisassembler::Fail; @@ -3204,7 +3316,7 @@ static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, +DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3220,7 +3332,7 @@ DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3262,7 +3374,7 @@ DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, // Decode a shifted immediate operand. These basically consist // of an 8-bit value, and a 4-bit directive that specifies either // a splat operation or a rotation. -static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { unsigned ctrl = fieldFromInstruction32(Val, 10, 2); if (ctrl == 0) { @@ -3294,13 +3406,15 @@ static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, +DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - Inst.addOperand(MCOperand::CreateImm(Val << 1)); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, true, 4, Inst, Decoder)) @@ -3308,7 +3422,7 @@ static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { switch (Val) { default: @@ -3328,14 +3442,14 @@ static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!Val) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3358,7 +3472,7 @@ static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder){ DecodeStatus S = MCDisassembler::Success; @@ -3385,7 +3499,7 @@ static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3410,7 +3524,7 @@ static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3438,7 +3552,7 @@ static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3463,7 +3577,7 @@ static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3488,7 +3602,7 @@ static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3547,7 +3661,7 @@ static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3605,7 +3719,7 @@ static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3672,7 +3786,7 @@ static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3736,7 +3850,7 @@ static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3806,7 +3920,7 @@ static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3870,7 +3984,7 @@ static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3944,7 +4058,7 @@ static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4009,7 +4123,7 @@ static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); @@ -4035,7 +4149,7 @@ static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); @@ -4061,7 +4175,7 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction16(Insn, 4, 4); @@ -4088,7 +4202,7 @@ static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4125,7 +4239,7 @@ DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4159,7 +4273,7 @@ DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, +static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, const void *Decoder) { unsigned sign1 = fieldFromInstruction32(Insn, 21, 1); unsigned sign2 = fieldFromInstruction32(Insn, 23, 1); @@ -4174,7 +4288,7 @@ static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, +static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4185,7 +4299,7 @@ static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, return S; } -static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Rt = fieldFromInstruction32(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4); @@ -4196,6 +4310,10 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, return DecodeCPSInstruction(Inst, Insn, Address, Decoder); DecodeStatus S = MCDisassembler::Success; + + if (Rt == Rn || Rn == Rt2) + S = MCDisassembler::SoftFail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) @@ -4208,7 +4326,7 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); @@ -4236,7 +4354,7 @@ static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); @@ -4263,3 +4381,59 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, return S; } + +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 16, 4); + unsigned Rt = fieldFromInstruction32(Val, 12, 4); + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + Rm |= (fieldFromInstruction32(Val, 23, 1) << 4); + unsigned Cond = fieldFromInstruction32(Val, 28, 4); + + if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt) + S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + + DecodeStatus S = MCDisassembler::Success; + + unsigned CRm = fieldFromInstruction32(Val, 0, 4); + unsigned opc1 = fieldFromInstruction32(Val, 4, 4); + unsigned cop = fieldFromInstruction32(Val, 8, 4); + unsigned Rt = fieldFromInstruction32(Val, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Val, 16, 4); + + if ((cop & ~0x1) == 0xa) + return MCDisassembler::Fail; + + if (Rt == Rt2) + S = MCDisassembler::SoftFail; + + Inst.addOperand(MCOperand::CreateImm(cop)); + Inst.addOperand(MCOperand::CreateImm(opc1)); + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(CRm)); + + return S; +} + diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index 94075a9..52d8338 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMDisassembler parent = ARM -required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support +required_libraries = ARMDesc ARMInfo MC Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2b994df..cbd81c1 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -18,11 +18,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "ARMGenAsmWriter.inc" /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. @@ -36,17 +36,14 @@ static unsigned translateShiftImm(unsigned imm) { ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MRI) { + MCInstPrinter(MAI, MII, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } -StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } @@ -212,12 +209,12 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); // If a symbolic branch target was added as a constant expression then print - // that address in hex. + // that address in hex. And only print 32 unsigned bits for the address. const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); int64_t Address; if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { O << "0x"; - O.write_hex(Address); + O.write_hex((uint32_t)Address); } else { // Otherwise, just print the expression. diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index e9cd407..8acb7ee 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -23,15 +23,12 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI); + ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - static const char *getInstructionName(unsigned Opcode); - // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 25849ee..d10bfc1 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -11,11 +11,11 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" @@ -78,7 +78,8 @@ public: { "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_bl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, @@ -115,6 +116,9 @@ public: // twiddled. if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { if (A) { const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); @@ -128,7 +132,8 @@ public: if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_bl)) + (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) IsResolved = false; } @@ -366,7 +371,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. @@ -466,7 +472,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { Value = -Value; isAdd = false; } + // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8]. assert ((Value < 256) && "Out of range pc-relative fixup value!"); + Value = (Value & 0xf) | ((Value & 0xf0) << 4); return Value | (isAdd << 23); } case ARM::fixup_arm_pcrel_10: @@ -577,7 +585,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 5476a46..aa649ba 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -177,7 +178,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_blx: case ARM::fixup_arm_uncondbranch: switch (Modifier) { @@ -189,6 +190,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; + case ARM::fixup_arm_condbl: case ARM::fixup_arm_condbranch: Type = ELF::R_ARM_JUMP24; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 1827986..0085feb 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -59,8 +59,21 @@ enum Fixups { // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, - // fixup_arm_bl - Fixup for ARM BL instructions. - fixup_arm_bl, + // The following fixups handle the ARM BL instructions. These can be + // conditionalised; however, the ARM ELF ABI requires a different relocation + // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that + // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has + // no conditional version; R_ARM_JUMP24 would have to insert a veneer. + // + // MachO does not draw a distinction between the two cases, so it will treat + // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups. + + // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions. + fixup_arm_uncondbl, + + // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial + // conditionalisation. + fixup_arm_condbl, // fixup_arm_blx - Fixup for ARM BLX instructions. fixup_arm_blx, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 4445dcd..10d1c48 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -597,8 +597,12 @@ uint32_t ARMMCCodeEmitter:: getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand MO = MI.getOperand(OpIdx); - if (MO.isExpr()) - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_bl, Fixups); + if (MO.isExpr()) { + if (HasConditionalBranch(MI)) + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbl, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups); + } return MO.getImm() >> 2; } @@ -1330,8 +1334,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, // LDM/STM: // {15-0} = Bitfield of GPRs. unsigned Reg = MI.getOperand(Op).getReg(); - bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); - bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); + bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); + bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); unsigned Binary = 0; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index ed27f9f..e3512cd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -163,10 +163,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createARMMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, MRI, STI); + return new ARMInstPrinter(MAI, MII, MRI, STI); return 0; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 9d3da14..8057cb6 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -82,7 +82,8 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: RelocType = unsigned(macho::RIT_ARM_Branch24Bit); // Report as 'long', even though that is not quite accurate. diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 4fcaecf..3eddda8 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -501,11 +501,6 @@ those operations and the ARMv6 scalar versions. //===---------------------------------------------------------------------===// -ARM::MOVCCr is commutable (by flipping the condition). But we need to implement -ARMInstrInfo::commuteInstruction() to support it. - -//===---------------------------------------------------------------------===// - Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 8cf7cac..e03e758 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 27fce9b..36af204 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -17,7 +17,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index def75dd..ecb4c2f 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -154,7 +154,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, ++I; if (I != E) { unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg); if (NCC == CC || NCC == OCC) return true; } @@ -171,7 +171,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; @@ -207,7 +207,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MI = NMI; unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2fe4b85..8ab486b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" @@ -59,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // If the first instruction of Tail is predicated, we may have to update // the IT instruction. unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg); MachineBasicBlock::iterator MBBI = Tail; if (CC != ARMCC::AL) // Expecting at least the t2IT instruction before it. @@ -107,7 +106,7 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, } unsigned PredReg = 0; - return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; + return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -574,7 +573,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, return; unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg); if (CC == ARMCC::AL || PredReg != ARM::CPSR) return; @@ -590,7 +589,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, continue; MachineInstr *NMI = &*MBBI; - ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); + ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg); if (!(NCC == CC || NCC == OCC) || NMI->modifiesRegister(SrcReg, &TRI) || NMI->modifiesRegister(ARM::CPSR, &TRI)) @@ -611,5 +610,5 @@ llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) return ARMCC::AL; - return llvm::getInstrPredicate(MI, PredReg); + return getInstrPredicate(MI, PredReg); } diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 1ae2ef1..0911f8a 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -15,9 +15,8 @@ #define THUMB2INSTRUCTIONINFO_H #include "ARM.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "Thumb2RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index fb9d93b..b5a397e 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -851,7 +851,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); - MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp deleted file mode 100644 index b6b209e..0000000 --- a/lib/Target/CBackend/CBackend.cpp +++ /dev/null @@ -1,3616 +0,0 @@ -//===-- CBackend.cpp - Library for converting LLVM code to C --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This library converts LLVM code to C code, compilable by GCC and other C -// compilers. -// -//===----------------------------------------------------------------------===// - -#include "CTargetMachine.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/PassManager.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/InlineAsm.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/ConstantsScanner.h" -#include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/IntrinsicLowering.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/Host.h" -#include "llvm/Config/config.h" -#include <algorithm> -// Some ms header decided to define setjmp as _setjmp, undo this for this file. -#ifdef _MSC_VER -#undef setjmp -#endif -using namespace llvm; - -extern "C" void LLVMInitializeCBackendTarget() { - // Register the target. - RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget); -} - -namespace { - class CBEMCAsmInfo : public MCAsmInfo { - public: - CBEMCAsmInfo() { - GlobalPrefix = ""; - PrivateGlobalPrefix = ""; - } - }; - - /// CWriter - This class is the main chunk of code that converts an LLVM - /// module to a C translation unit. - class CWriter : public FunctionPass, public InstVisitor<CWriter> { - formatted_raw_ostream &Out; - IntrinsicLowering *IL; - Mangler *Mang; - LoopInfo *LI; - const Module *TheModule; - const MCAsmInfo* TAsm; - const MCRegisterInfo *MRI; - const MCObjectFileInfo *MOFI; - MCContext *TCtx; - const TargetData* TD; - - std::map<const ConstantFP *, unsigned> FPConstantMap; - std::set<Function*> intrinsicPrototypesAlreadyGenerated; - std::set<const Argument*> ByValParams; - unsigned FPCounter; - unsigned OpaqueCounter; - DenseMap<const Value*, unsigned> AnonValueNumbers; - unsigned NextAnonValueNumber; - - /// UnnamedStructIDs - This contains a unique ID for each struct that is - /// either anonymous or has no name. - DenseMap<StructType*, unsigned> UnnamedStructIDs; - - public: - static char ID; - explicit CWriter(formatted_raw_ostream &o) - : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), - TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0), - OpaqueCounter(0), NextAnonValueNumber(0) { - initializeLoopInfoPass(*PassRegistry::getPassRegistry()); - FPCounter = 0; - } - - virtual const char *getPassName() const { return "C backend"; } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); - AU.setPreservesAll(); - } - - virtual bool doInitialization(Module &M); - - bool runOnFunction(Function &F) { - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (F.hasAvailableExternallyLinkage()) - return false; - - LI = &getAnalysis<LoopInfo>(); - - // Get rid of intrinsics we can't handle. - lowerIntrinsics(F); - - // Output all floating point constants that cannot be printed accurately. - printFloatingPointConstants(F); - - printFunction(F); - return false; - } - - virtual bool doFinalization(Module &M) { - // Free memory... - delete IL; - delete TD; - delete Mang; - delete TCtx; - delete TAsm; - delete MRI; - delete MOFI; - FPConstantMap.clear(); - ByValParams.clear(); - intrinsicPrototypesAlreadyGenerated.clear(); - UnnamedStructIDs.clear(); - return false; - } - - raw_ostream &printType(raw_ostream &Out, Type *Ty, - bool isSigned = false, - const std::string &VariableName = "", - bool IgnoreName = false, - const AttrListPtr &PAL = AttrListPtr()); - raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty, - bool isSigned, - const std::string &NameSoFar = ""); - - void printStructReturnPointerFunctionType(raw_ostream &Out, - const AttrListPtr &PAL, - PointerType *Ty); - - std::string getStructName(StructType *ST); - - /// writeOperandDeref - Print the result of dereferencing the specified - /// operand with '*'. This is equivalent to printing '*' then using - /// writeOperand, but avoids excess syntax in some cases. - void writeOperandDeref(Value *Operand) { - if (isAddressExposed(Operand)) { - // Already something with an address exposed. - writeOperandInternal(Operand); - } else { - Out << "*("; - writeOperand(Operand); - Out << ")"; - } - } - - void writeOperand(Value *Operand, bool Static = false); - void writeInstComputationInline(Instruction &I); - void writeOperandInternal(Value *Operand, bool Static = false); - void writeOperandWithCast(Value* Operand, unsigned Opcode); - void writeOperandWithCast(Value* Operand, const ICmpInst &I); - bool writeInstructionCast(const Instruction &I); - - void writeMemoryAccess(Value *Operand, Type *OperandType, - bool IsVolatile, unsigned Alignment); - - private : - std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c); - - void lowerIntrinsics(Function &F); - /// Prints the definition of the intrinsic function F. Supports the - /// intrinsics which need to be explicitly defined in the CBackend. - void printIntrinsicDefinition(const Function &F, raw_ostream &Out); - - void printModuleTypes(); - void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &); - void printFloatingPointConstants(Function &F); - void printFloatingPointConstants(const Constant *C); - void printFunctionSignature(const Function *F, bool Prototype); - - void printFunction(Function &); - void printBasicBlock(BasicBlock *BB); - void printLoop(Loop *L); - - void printCast(unsigned opcode, Type *SrcTy, Type *DstTy); - void printConstant(Constant *CPV, bool Static); - void printConstantWithCast(Constant *CPV, unsigned Opcode); - bool printConstExprCast(const ConstantExpr *CE, bool Static); - void printConstantArray(ConstantArray *CPA, bool Static); - void printConstantVector(ConstantVector *CV, bool Static); - void printConstantDataSequential(ConstantDataSequential *CDS, bool Static); - - - /// isAddressExposed - Return true if the specified value's name needs to - /// have its address taken in order to get a C value of the correct type. - /// This happens for global variables, byval parameters, and direct allocas. - bool isAddressExposed(const Value *V) const { - if (const Argument *A = dyn_cast<Argument>(V)) - return ByValParams.count(A); - return isa<GlobalVariable>(V) || isDirectAlloca(V); - } - - // isInlinableInst - Attempt to inline instructions into their uses to build - // trees as much as possible. To do this, we have to consistently decide - // what is acceptable to inline, so that variable declarations don't get - // printed and an extra copy of the expr is not emitted. - // - static bool isInlinableInst(const Instruction &I) { - // Always inline cmp instructions, even if they are shared by multiple - // expressions. GCC generates horrible code if we don't. - if (isa<CmpInst>(I)) - return true; - - // Must be an expression, must be used exactly once. If it is dead, we - // emit it inline where it would go. - if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() || - isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) || - isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) || - isa<InsertValueInst>(I)) - // Don't inline a load across a store or other bad things! - return false; - - // Must not be used in inline asm, extractelement, or shufflevector. - if (I.hasOneUse()) { - const Instruction &User = cast<Instruction>(*I.use_back()); - if (isInlineAsm(User) || isa<ExtractElementInst>(User) || - isa<ShuffleVectorInst>(User)) - return false; - } - - // Only inline instruction it if it's use is in the same BB as the inst. - return I.getParent() == cast<Instruction>(I.use_back())->getParent(); - } - - // isDirectAlloca - Define fixed sized allocas in the entry block as direct - // variables which are accessed with the & operator. This causes GCC to - // generate significantly better code than to emit alloca calls directly. - // - static const AllocaInst *isDirectAlloca(const Value *V) { - const AllocaInst *AI = dyn_cast<AllocaInst>(V); - if (!AI) return 0; - if (AI->isArrayAllocation()) - return 0; // FIXME: we can also inline fixed size array allocas! - if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock()) - return 0; - return AI; - } - - // isInlineAsm - Check if the instruction is a call to an inline asm chunk. - static bool isInlineAsm(const Instruction& I) { - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - return isa<InlineAsm>(CI->getCalledValue()); - return false; - } - - // Instruction visitation functions - friend class InstVisitor<CWriter>; - - void visitReturnInst(ReturnInst &I); - void visitBranchInst(BranchInst &I); - void visitSwitchInst(SwitchInst &I); - void visitIndirectBrInst(IndirectBrInst &I); - void visitInvokeInst(InvokeInst &I) { - llvm_unreachable("Lowerinvoke pass didn't work!"); - } - void visitResumeInst(ResumeInst &I) { - llvm_unreachable("DwarfEHPrepare pass didn't work!"); - } - void visitUnreachableInst(UnreachableInst &I); - - void visitPHINode(PHINode &I); - void visitBinaryOperator(Instruction &I); - void visitICmpInst(ICmpInst &I); - void visitFCmpInst(FCmpInst &I); - - void visitCastInst (CastInst &I); - void visitSelectInst(SelectInst &I); - void visitCallInst (CallInst &I); - void visitInlineAsm(CallInst &I); - bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee); - - void visitAllocaInst(AllocaInst &I); - void visitLoadInst (LoadInst &I); - void visitStoreInst (StoreInst &I); - void visitGetElementPtrInst(GetElementPtrInst &I); - void visitVAArgInst (VAArgInst &I); - - void visitInsertElementInst(InsertElementInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitShuffleVectorInst(ShuffleVectorInst &SVI); - - void visitInsertValueInst(InsertValueInst &I); - void visitExtractValueInst(ExtractValueInst &I); - - void visitInstruction(Instruction &I) { -#ifndef NDEBUG - errs() << "C Writer does not know about " << I; -#endif - llvm_unreachable(0); - } - - void outputLValue(Instruction *I) { - Out << " " << GetValueName(I) << " = "; - } - - bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To); - void printPHICopiesForSuccessor(BasicBlock *CurBlock, - BasicBlock *Successor, unsigned Indent); - void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock, - unsigned Indent); - void printGEPExpression(Value *Ptr, gep_type_iterator I, - gep_type_iterator E, bool Static); - - std::string GetValueName(const Value *Operand); - }; -} - -char CWriter::ID = 0; - - - -static std::string CBEMangle(const std::string &S) { - std::string Result; - - for (unsigned i = 0, e = S.size(); i != e; ++i) - if (isalnum(S[i]) || S[i] == '_') { - Result += S[i]; - } else { - Result += '_'; - Result += 'A'+(S[i]&15); - Result += 'A'+((S[i]>>4)&15); - Result += '_'; - } - return Result; -} - -std::string CWriter::getStructName(StructType *ST) { - if (!ST->isLiteral() && !ST->getName().empty()) - return CBEMangle("l_"+ST->getName().str()); - - return "l_unnamed_" + utostr(UnnamedStructIDs[ST]); -} - - -/// printStructReturnPointerFunctionType - This is like printType for a struct -/// return type, except, instead of printing the type as void (*)(Struct*, ...) -/// print it as "Struct (*)(...)", for struct return functions. -void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, - const AttrListPtr &PAL, - PointerType *TheTy) { - FunctionType *FTy = cast<FunctionType>(TheTy->getElementType()); - std::string tstr; - raw_string_ostream FunctionInnards(tstr); - FunctionInnards << " (*) ("; - bool PrintedType = false; - - FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); - Type *RetTy = cast<PointerType>(*I)->getElementType(); - unsigned Idx = 1; - for (++I, ++Idx; I != E; ++I, ++Idx) { - if (PrintedType) - FunctionInnards << ", "; - Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(ArgTy->isPointerTy()); - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - } - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); - PrintedType = true; - } - if (FTy->isVarArg()) { - if (!PrintedType) - FunctionInnards << " int"; //dummy argument for empty vararg functs - FunctionInnards << ", ..."; - } else if (!PrintedType) { - FunctionInnards << "void"; - } - FunctionInnards << ')'; - printType(Out, RetTy, - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); -} - -raw_ostream & -CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned, - const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && - "Invalid type for printSimpleType"); - switch (Ty->getTypeID()) { - case Type::VoidTyID: return Out << "void " << NameSoFar; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits == 1) - return Out << "bool " << NameSoFar; - else if (NumBits <= 8) - return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar; - else if (NumBits <= 16) - return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar; - else if (NumBits <= 32) - return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar; - else if (NumBits <= 64) - return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar; - else { - assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); - return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar; - } - } - case Type::FloatTyID: return Out << "float " << NameSoFar; - case Type::DoubleTyID: return Out << "double " << NameSoFar; - // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is - // present matches host 'long double'. - case Type::X86_FP80TyID: - case Type::PPC_FP128TyID: - case Type::FP128TyID: return Out << "long double " << NameSoFar; - - case Type::X86_MMXTyID: - return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned, - " __attribute__((vector_size(64))) " + NameSoFar); - - case Type::VectorTyID: { - VectorType *VTy = cast<VectorType>(Ty); - return printSimpleType(Out, VTy->getElementType(), isSigned, - " __attribute__((vector_size(" + - utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); - } - - default: -#ifndef NDEBUG - errs() << "Unknown primitive type: " << *Ty << "\n"; -#endif - llvm_unreachable(0); - } -} - -// Pass the Type* and the variable name and this prints out the variable -// declaration. -// -raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty, - bool isSigned, const std::string &NameSoFar, - bool IgnoreName, const AttrListPtr &PAL) { - if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) { - printSimpleType(Out, Ty, isSigned, NameSoFar); - return Out; - } - - switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - FunctionType *FTy = cast<FunctionType>(Ty); - std::string tstr; - raw_string_ostream FunctionInnards(tstr); - FunctionInnards << " (" << NameSoFar << ") ("; - unsigned Idx = 1; - for (FunctionType::param_iterator I = FTy->param_begin(), - E = FTy->param_end(); I != E; ++I) { - Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(ArgTy->isPointerTy()); - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - } - if (I != FTy->param_begin()) - FunctionInnards << ", "; - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); - ++Idx; - } - if (FTy->isVarArg()) { - if (!FTy->getNumParams()) - FunctionInnards << " int"; //dummy argument for empty vaarg functs - FunctionInnards << ", ..."; - } else if (!FTy->getNumParams()) { - FunctionInnards << "void"; - } - FunctionInnards << ')'; - printType(Out, FTy->getReturnType(), - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); - return Out; - } - case Type::StructTyID: { - StructType *STy = cast<StructType>(Ty); - - // Check to see if the type is named. - if (!IgnoreName) - return Out << getStructName(STy) << ' ' << NameSoFar; - - Out << NameSoFar + " {\n"; - unsigned Idx = 0; - for (StructType::element_iterator I = STy->element_begin(), - E = STy->element_end(); I != E; ++I) { - Out << " "; - printType(Out, *I, false, "field" + utostr(Idx++)); - Out << ";\n"; - } - Out << '}'; - if (STy->isPacked()) - Out << " __attribute__ ((packed))"; - return Out; - } - - case Type::PointerTyID: { - PointerType *PTy = cast<PointerType>(Ty); - std::string ptrName = "*" + NameSoFar; - - if (PTy->getElementType()->isArrayTy() || - PTy->getElementType()->isVectorTy()) - ptrName = "(" + ptrName + ")"; - - if (!PAL.isEmpty()) - // Must be a function ptr cast! - return printType(Out, PTy->getElementType(), false, ptrName, true, PAL); - return printType(Out, PTy->getElementType(), false, ptrName); - } - - case Type::ArrayTyID: { - ArrayType *ATy = cast<ArrayType>(Ty); - unsigned NumElements = ATy->getNumElements(); - if (NumElements == 0) NumElements = 1; - // Arrays are wrapped in structs to allow them to have normal - // value semantics (avoiding the array "decay"). - Out << NameSoFar << " { "; - printType(Out, ATy->getElementType(), false, - "array[" + utostr(NumElements) + "]"); - return Out << "; }"; - } - - default: - llvm_unreachable("Unhandled case in getTypeProps!"); - } -} - -void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { - Out << "{ "; - printConstant(cast<Constant>(CPA->getOperand(0)), Static); - for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) { - Out << ", "; - printConstant(cast<Constant>(CPA->getOperand(i)), Static); - } - Out << " }"; -} - -void CWriter::printConstantVector(ConstantVector *CP, bool Static) { - Out << "{ "; - printConstant(cast<Constant>(CP->getOperand(0)), Static); - for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { - Out << ", "; - printConstant(cast<Constant>(CP->getOperand(i)), Static); - } - Out << " }"; -} - -void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, - bool Static) { - // As a special case, print the array as a string if it is an array of - // ubytes or an array of sbytes with positive values. - // - if (CDS->isCString()) { - Out << '\"'; - // Keep track of whether the last number was a hexadecimal escape. - bool LastWasHex = false; - - StringRef Bytes = CDS->getAsCString(); - - // Do not include the last character, which we know is null - for (unsigned i = 0, e = Bytes.size(); i != e; ++i) { - unsigned char C = Bytes[i]; - - // Print it out literally if it is a printable character. The only thing - // to be careful about is when the last letter output was a hex escape - // code, in which case we have to be careful not to print out hex digits - // explicitly (the C compiler thinks it is a continuation of the previous - // character, sheesh...) - // - if (isprint(C) && (!LastWasHex || !isxdigit(C))) { - LastWasHex = false; - if (C == '"' || C == '\\') - Out << "\\" << (char)C; - else - Out << (char)C; - } else { - LastWasHex = false; - switch (C) { - case '\n': Out << "\\n"; break; - case '\t': Out << "\\t"; break; - case '\r': Out << "\\r"; break; - case '\v': Out << "\\v"; break; - case '\a': Out << "\\a"; break; - case '\"': Out << "\\\""; break; - case '\'': Out << "\\\'"; break; - default: - Out << "\\x"; - Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')); - Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); - LastWasHex = true; - break; - } - } - } - Out << '\"'; - } else { - Out << "{ "; - printConstant(CDS->getElementAsConstant(0), Static); - for (unsigned i = 1, e = CDS->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(CDS->getElementAsConstant(i), Static); - } - Out << " }"; - } -} - - -// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out -// textually as a double (rather than as a reference to a stack-allocated -// variable). We decide this by converting CFP to a string and back into a -// double, and then checking whether the conversion results in a bit-equal -// double to the original value of CFP. This depends on us and the target C -// compiler agreeing on the conversion process (which is pretty likely since we -// only deal in IEEE FP). -// -static bool isFPCSafeToPrint(const ConstantFP *CFP) { - bool ignored; - // Do long doubles in hex for now. - if (CFP->getType() != Type::getFloatTy(CFP->getContext()) && - CFP->getType() != Type::getDoubleTy(CFP->getContext())) - return false; - APFloat APF = APFloat(CFP->getValueAPF()); // copy - if (CFP->getType() == Type::getFloatTy(CFP->getContext())) - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); -#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A - char Buffer[100]; - sprintf(Buffer, "%a", APF.convertToDouble()); - if (!strncmp(Buffer, "0x", 2) || - !strncmp(Buffer, "-0x", 3) || - !strncmp(Buffer, "+0x", 3)) - return APF.bitwiseIsEqual(APFloat(atof(Buffer))); - return false; -#else - std::string StrVal = ftostr(APF); - - while (StrVal[0] == ' ') - StrVal.erase(StrVal.begin()); - - // Check to make sure that the stringized number is not some string like "Inf" - // or NaN. Check that the string matches the "[-+]?[0-9]" regex. - if ((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) - // Reparse stringized version! - return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str()))); - return false; -#endif -} - -/// Print out the casting for a cast operation. This does the double casting -/// necessary for conversion to the destination type, if necessary. -/// @brief Print a cast -void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) { - // Print the destination type cast - switch (opc) { - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::IntToPtr: - case Instruction::Trunc: - case Instruction::BitCast: - case Instruction::FPExt: - case Instruction::FPTrunc: // For these the DstTy sign doesn't matter - Out << '('; - printType(Out, DstTy); - Out << ')'; - break; - case Instruction::ZExt: - case Instruction::PtrToInt: - case Instruction::FPToUI: // For these, make sure we get an unsigned dest - Out << '('; - printSimpleType(Out, DstTy, false); - Out << ')'; - break; - case Instruction::SExt: - case Instruction::FPToSI: // For these, make sure we get a signed dest - Out << '('; - printSimpleType(Out, DstTy, true); - Out << ')'; - break; - default: - llvm_unreachable("Invalid cast opcode"); - } - - // Print the source type cast - switch (opc) { - case Instruction::UIToFP: - case Instruction::ZExt: - Out << '('; - printSimpleType(Out, SrcTy, false); - Out << ')'; - break; - case Instruction::SIToFP: - case Instruction::SExt: - Out << '('; - printSimpleType(Out, SrcTy, true); - Out << ')'; - break; - case Instruction::IntToPtr: - case Instruction::PtrToInt: - // Avoid "cast to pointer from integer of different size" warnings - Out << "(unsigned long)"; - break; - case Instruction::Trunc: - case Instruction::BitCast: - case Instruction::FPExt: - case Instruction::FPTrunc: - case Instruction::FPToSI: - case Instruction::FPToUI: - break; // These don't need a source cast. - default: - llvm_unreachable("Invalid cast opcode"); - } -} - -// printConstant - The LLVM Constant to C Constant converter. -void CWriter::printConstant(Constant *CPV, bool Static) { - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) { - switch (CE->getOpcode()) { - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - Out << "("; - printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType()); - if (CE->getOpcode() == Instruction::SExt && - CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) { - // Make sure we really sext from bool here by subtracting from 0 - Out << "0-"; - } - printConstant(CE->getOperand(0), Static); - if (CE->getType() == Type::getInt1Ty(CPV->getContext()) && - (CE->getOpcode() == Instruction::Trunc || - CE->getOpcode() == Instruction::FPToUI || - CE->getOpcode() == Instruction::FPToSI || - CE->getOpcode() == Instruction::PtrToInt)) { - // Make sure we really truncate to bool here by anding with 1 - Out << "&1u"; - } - Out << ')'; - return; - - case Instruction::GetElementPtr: - Out << "("; - printGEPExpression(CE->getOperand(0), gep_type_begin(CPV), - gep_type_end(CPV), Static); - Out << ")"; - return; - case Instruction::Select: - Out << '('; - printConstant(CE->getOperand(0), Static); - Out << '?'; - printConstant(CE->getOperand(1), Static); - Out << ':'; - printConstant(CE->getOperand(2), Static); - Out << ')'; - return; - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::ICmp: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - { - Out << '('; - bool NeedsClosingParens = printConstExprCast(CE, Static); - printConstantWithCast(CE->getOperand(0), CE->getOpcode()); - switch (CE->getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl: Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - case Instruction::ICmp: - switch (CE->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << " == "; break; - case ICmpInst::ICMP_NE: Out << " != "; break; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_ULT: Out << " < "; break; - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_ULE: Out << " <= "; break; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_UGT: Out << " > "; break; - case ICmpInst::ICMP_SGE: - case ICmpInst::ICMP_UGE: Out << " >= "; break; - default: llvm_unreachable("Illegal ICmp predicate"); - } - break; - default: llvm_unreachable("Illegal opcode here!"); - } - printConstantWithCast(CE->getOperand(1), CE->getOpcode()); - if (NeedsClosingParens) - Out << "))"; - Out << ')'; - return; - } - case Instruction::FCmp: { - Out << '('; - bool NeedsClosingParens = printConstExprCast(CE, Static); - if (CE->getPredicate() == FCmpInst::FCMP_FALSE) - Out << "0"; - else if (CE->getPredicate() == FCmpInst::FCMP_TRUE) - Out << "1"; - else { - const char* op = 0; - switch (CE->getPredicate()) { - default: llvm_unreachable("Illegal FCmp predicate"); - case FCmpInst::FCMP_ORD: op = "ord"; break; - case FCmpInst::FCMP_UNO: op = "uno"; break; - case FCmpInst::FCMP_UEQ: op = "ueq"; break; - case FCmpInst::FCMP_UNE: op = "une"; break; - case FCmpInst::FCMP_ULT: op = "ult"; break; - case FCmpInst::FCMP_ULE: op = "ule"; break; - case FCmpInst::FCMP_UGT: op = "ugt"; break; - case FCmpInst::FCMP_UGE: op = "uge"; break; - case FCmpInst::FCMP_OEQ: op = "oeq"; break; - case FCmpInst::FCMP_ONE: op = "one"; break; - case FCmpInst::FCMP_OLT: op = "olt"; break; - case FCmpInst::FCMP_OLE: op = "ole"; break; - case FCmpInst::FCMP_OGT: op = "ogt"; break; - case FCmpInst::FCMP_OGE: op = "oge"; break; - } - Out << "llvm_fcmp_" << op << "("; - printConstantWithCast(CE->getOperand(0), CE->getOpcode()); - Out << ", "; - printConstantWithCast(CE->getOperand(1), CE->getOpcode()); - Out << ")"; - } - if (NeedsClosingParens) - Out << "))"; - Out << ')'; - return; - } - default: -#ifndef NDEBUG - errs() << "CWriter Error: Unhandled constant expression: " - << *CE << "\n"; -#endif - llvm_unreachable(0); - } - } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) { - Out << "(("; - printType(Out, CPV->getType()); // sign doesn't matter - Out << ")/*UNDEF*/"; - if (!CPV->getType()->isVectorTy()) { - Out << "0)"; - } else { - Out << "{})"; - } - return; - } - - if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { - Type* Ty = CI->getType(); - if (Ty == Type::getInt1Ty(CPV->getContext())) - Out << (CI->getZExtValue() ? '1' : '0'); - else if (Ty == Type::getInt32Ty(CPV->getContext())) - Out << CI->getZExtValue() << 'u'; - else if (Ty->getPrimitiveSizeInBits() > 32) - Out << CI->getZExtValue() << "ull"; - else { - Out << "(("; - printSimpleType(Out, Ty, false) << ')'; - if (CI->isMinValue(true)) - Out << CI->getZExtValue() << 'u'; - else - Out << CI->getSExtValue(); - Out << ')'; - } - return; - } - - switch (CPV->getType()->getTypeID()) { - case Type::FloatTyID: - case Type::DoubleTyID: - case Type::X86_FP80TyID: - case Type::PPC_FP128TyID: - case Type::FP128TyID: { - ConstantFP *FPC = cast<ConstantFP>(CPV); - std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC); - if (I != FPConstantMap.end()) { - // Because of FP precision problems we must load from a stack allocated - // value that holds the value in hex. - Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ? - "float" : - FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? - "double" : - "long double") - << "*)&FPConstant" << I->second << ')'; - } else { - double V; - if (FPC->getType() == Type::getFloatTy(CPV->getContext())) - V = FPC->getValueAPF().convertToFloat(); - else if (FPC->getType() == Type::getDoubleTy(CPV->getContext())) - V = FPC->getValueAPF().convertToDouble(); - else { - // Long double. Convert the number to double, discarding precision. - // This is not awesome, but it at least makes the CBE output somewhat - // useful. - APFloat Tmp = FPC->getValueAPF(); - bool LosesInfo; - Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo); - V = Tmp.convertToDouble(); - } - - if (IsNAN(V)) { - // The value is NaN - - // FIXME the actual NaN bits should be emitted. - // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN, - // it's 0x7ff4. - const unsigned long QuietNaN = 0x7ff8UL; - //const unsigned long SignalNaN = 0x7ff4UL; - - // We need to grab the first part of the FP # - char Buffer[100]; - - uint64_t ll = DoubleToBits(V); - sprintf(Buffer, "0x%llx", static_cast<long long>(ll)); - - std::string Num(&Buffer[0], &Buffer[6]); - unsigned long Val = strtoul(Num.c_str(), 0, 16); - - if (FPC->getType() == Type::getFloatTy(FPC->getContext())) - Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\"" - << Buffer << "\") /*nan*/ "; - else - Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\"" - << Buffer << "\") /*nan*/ "; - } else if (IsInf(V)) { - // The value is Inf - if (V < 0) Out << '-'; - Out << "LLVM_INF" << - (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "") - << " /*inf*/ "; - } else { - std::string Num; -#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A - // Print out the constant as a floating point number. - char Buffer[100]; - sprintf(Buffer, "%a", V); - Num = Buffer; -#else - Num = ftostr(FPC->getValueAPF()); -#endif - Out << Num; - } - } - break; - } - - case Type::ArrayTyID: - // Use C99 compound expression literal initializer syntax. - if (!Static) { - Out << "("; - printType(Out, CPV->getType()); - Out << ")"; - } - Out << "{ "; // Arrays are wrapped in struct types. - if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) { - printConstantArray(CA, Static); - } else if (ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { - printConstantDataSequential(CDS, Static); - } else { - assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - ArrayType *AT = cast<ArrayType>(CPV->getType()); - Out << '{'; - if (AT->getNumElements()) { - Out << ' '; - Constant *CZ = Constant::getNullValue(AT->getElementType()); - printConstant(CZ, Static); - for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(CZ, Static); - } - } - Out << " }"; - } - Out << " }"; // Arrays are wrapped in struct types. - break; - - case Type::VectorTyID: - // Use C99 compound expression literal initializer syntax. - if (!Static) { - Out << "("; - printType(Out, CPV->getType()); - Out << ")"; - } - if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) { - printConstantVector(CV, Static); - } else if (ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { - printConstantDataSequential(CDS, Static); - } else { - assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - VectorType *VT = cast<VectorType>(CPV->getType()); - Out << "{ "; - Constant *CZ = Constant::getNullValue(VT->getElementType()); - printConstant(CZ, Static); - for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(CZ, Static); - } - Out << " }"; - } - break; - - case Type::StructTyID: - // Use C99 compound expression literal initializer syntax. - if (!Static) { - Out << "("; - printType(Out, CPV->getType()); - Out << ")"; - } - if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) { - StructType *ST = cast<StructType>(CPV->getType()); - Out << '{'; - if (ST->getNumElements()) { - Out << ' '; - printConstant(Constant::getNullValue(ST->getElementType(0)), Static); - for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(Constant::getNullValue(ST->getElementType(i)), Static); - } - } - Out << " }"; - } else { - Out << '{'; - if (CPV->getNumOperands()) { - Out << ' '; - printConstant(cast<Constant>(CPV->getOperand(0)), Static); - for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) { - Out << ", "; - printConstant(cast<Constant>(CPV->getOperand(i)), Static); - } - } - Out << " }"; - } - break; - - case Type::PointerTyID: - if (isa<ConstantPointerNull>(CPV)) { - Out << "(("; - printType(Out, CPV->getType()); // sign doesn't matter - Out << ")/*NULL*/0)"; - break; - } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) { - writeOperand(GV, Static); - break; - } - // FALL THROUGH - default: -#ifndef NDEBUG - errs() << "Unknown constant type: " << *CPV << "\n"; -#endif - llvm_unreachable(0); - } -} - -// Some constant expressions need to be casted back to the original types -// because their operands were casted to the expected type. This function takes -// care of detecting that case and printing the cast for the ConstantExpr. -bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { - bool NeedsExplicitCast = false; - Type *Ty = CE->getOperand(0)->getType(); - bool TypeIsSigned = false; - switch (CE->getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::URem: - case Instruction::UDiv: NeedsExplicitCast = true; break; - case Instruction::AShr: - case Instruction::SRem: - case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break; - case Instruction::SExt: - Ty = CE->getType(); - NeedsExplicitCast = true; - TypeIsSigned = true; - break; - case Instruction::ZExt: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - Ty = CE->getType(); - NeedsExplicitCast = true; - break; - default: break; - } - if (NeedsExplicitCast) { - Out << "(("; - if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext())) - printSimpleType(Out, Ty, TypeIsSigned); - else - printType(Out, Ty); // not integer, sign doesn't matter - Out << ")("; - } - return NeedsExplicitCast; -} - -// Print a constant assuming that it is the operand for a given Opcode. The -// opcodes that care about sign need to cast their operands to the expected -// type before the operation proceeds. This function does the casting. -void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { - - // Extract the operand's type, we'll need it. - Type* OpTy = CPV->getType(); - - // Indicate whether to do the cast or not. - bool shouldCast = false; - bool typeIsSigned = false; - - // Based on the Opcode for which this Constant is being written, determine - // the new type to which the operand should be casted by setting the value - // of OpTy. If we change OpTy, also set shouldCast to true so it gets - // casted below. - switch (Opcode) { - default: - // for most instructions, it doesn't matter - break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::UDiv: - case Instruction::URem: - shouldCast = true; - break; - case Instruction::AShr: - case Instruction::SDiv: - case Instruction::SRem: - shouldCast = true; - typeIsSigned = true; - break; - } - - // Write out the casted constant if we should, otherwise just write the - // operand. - if (shouldCast) { - Out << "(("; - printSimpleType(Out, OpTy, typeIsSigned); - Out << ")"; - printConstant(CPV, false); - Out << ")"; - } else - printConstant(CPV, false); -} - -std::string CWriter::GetValueName(const Value *Operand) { - - // Resolve potential alias. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) { - if (const Value *V = GA->resolveAliasedGlobal(false)) - Operand = V; - } - - // Mangle globals with the standard mangler interface for LLC compatibility. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) { - SmallString<128> Str; - Mang->getNameWithPrefix(Str, GV, false); - return CBEMangle(Str.str().str()); - } - - std::string Name = Operand->getName(); - - if (Name.empty()) { // Assign unique names to local temporaries. - unsigned &No = AnonValueNumbers[Operand]; - if (No == 0) - No = ++NextAnonValueNumber; - Name = "tmp__" + utostr(No); - } - - std::string VarName; - VarName.reserve(Name.capacity()); - - for (std::string::iterator I = Name.begin(), E = Name.end(); - I != E; ++I) { - char ch = *I; - - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (ch >= '0' && ch <= '9') || ch == '_')) { - char buffer[5]; - sprintf(buffer, "_%x_", ch); - VarName += buffer; - } else - VarName += ch; - } - - return "llvm_cbe_" + VarName; -} - -/// writeInstComputationInline - Emit the computation for the specified -/// instruction inline, with no destination provided. -void CWriter::writeInstComputationInline(Instruction &I) { - // We can't currently support integer types other than 1, 8, 16, 32, 64. - // Validate this. - Type *Ty = I.getType(); - if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) && - Ty!=Type::getInt8Ty(I.getContext()) && - Ty!=Type::getInt16Ty(I.getContext()) && - Ty!=Type::getInt32Ty(I.getContext()) && - Ty!=Type::getInt64Ty(I.getContext()))) { - report_fatal_error("The C backend does not currently support integer " - "types of widths other than 1, 8, 16, 32, 64.\n" - "This is being tracked as PR 4158."); - } - - // If this is a non-trivial bool computation, make sure to truncate down to - // a 1 bit value. This is important because we want "add i1 x, y" to return - // "0" when x and y are true, not "2" for example. - bool NeedBoolTrunc = false; - if (I.getType() == Type::getInt1Ty(I.getContext()) && - !isa<ICmpInst>(I) && !isa<FCmpInst>(I)) - NeedBoolTrunc = true; - - if (NeedBoolTrunc) - Out << "(("; - - visit(I); - - if (NeedBoolTrunc) - Out << ")&1)"; -} - - -void CWriter::writeOperandInternal(Value *Operand, bool Static) { - if (Instruction *I = dyn_cast<Instruction>(Operand)) - // Should we inline this instruction to build a tree? - if (isInlinableInst(*I) && !isDirectAlloca(I)) { - Out << '('; - writeInstComputationInline(*I); - Out << ')'; - return; - } - - Constant* CPV = dyn_cast<Constant>(Operand); - - if (CPV && !isa<GlobalValue>(CPV)) - printConstant(CPV, Static); - else - Out << GetValueName(Operand); -} - -void CWriter::writeOperand(Value *Operand, bool Static) { - bool isAddressImplicit = isAddressExposed(Operand); - if (isAddressImplicit) - Out << "(&"; // Global variables are referenced as their addresses by llvm - - writeOperandInternal(Operand, Static); - - if (isAddressImplicit) - Out << ')'; -} - -// Some instructions need to have their result value casted back to the -// original types because their operands were casted to the expected type. -// This function takes care of detecting that case and printing the cast -// for the Instruction. -bool CWriter::writeInstructionCast(const Instruction &I) { - Type *Ty = I.getOperand(0)->getType(); - switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::URem: - case Instruction::UDiv: - Out << "(("; - printSimpleType(Out, Ty, false); - Out << ")("; - return true; - case Instruction::AShr: - case Instruction::SRem: - case Instruction::SDiv: - Out << "(("; - printSimpleType(Out, Ty, true); - Out << ")("; - return true; - default: break; - } - return false; -} - -// Write the operand with a cast to another type based on the Opcode being used. -// This will be used in cases where an instruction has specific type -// requirements (usually signedness) for its operands. -void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { - - // Extract the operand's type, we'll need it. - Type* OpTy = Operand->getType(); - - // Indicate whether to do the cast or not. - bool shouldCast = false; - - // Indicate whether the cast should be to a signed type or not. - bool castIsSigned = false; - - // Based on the Opcode for which this Operand is being written, determine - // the new type to which the operand should be casted by setting the value - // of OpTy. If we change OpTy, also set shouldCast to true. - switch (Opcode) { - default: - // for most instructions, it doesn't matter - break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::UDiv: - case Instruction::URem: // Cast to unsigned first - shouldCast = true; - castIsSigned = false; - break; - case Instruction::GetElementPtr: - case Instruction::AShr: - case Instruction::SDiv: - case Instruction::SRem: // Cast to signed first - shouldCast = true; - castIsSigned = true; - break; - } - - // Write out the casted operand if we should, otherwise just write the - // operand. - if (shouldCast) { - Out << "(("; - printSimpleType(Out, OpTy, castIsSigned); - Out << ")"; - writeOperand(Operand); - Out << ")"; - } else - writeOperand(Operand); -} - -// Write the operand with a cast to another type based on the icmp predicate -// being used. -void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { - // This has to do a cast to ensure the operand has the right signedness. - // Also, if the operand is a pointer, we make sure to cast to an integer when - // doing the comparison both for signedness and so that the C compiler doesn't - // optimize things like "p < NULL" to false (p may contain an integer value - // f.e.). - bool shouldCast = Cmp.isRelational(); - - // Write out the casted operand if we should, otherwise just write the - // operand. - if (!shouldCast) { - writeOperand(Operand); - return; - } - - // Should this be a signed comparison? If so, convert to signed. - bool castIsSigned = Cmp.isSigned(); - - // If the operand was a pointer, convert to a large integer type. - Type* OpTy = Operand->getType(); - if (OpTy->isPointerTy()) - OpTy = TD->getIntPtrType(Operand->getContext()); - - Out << "(("; - printSimpleType(Out, OpTy, castIsSigned); - Out << ")"; - writeOperand(Operand); - Out << ")"; -} - -// generateCompilerSpecificCode - This is where we add conditional compilation -// directives to cater to specific compilers as need be. -// -static void generateCompilerSpecificCode(formatted_raw_ostream& Out, - const TargetData *TD) { - // Alloca is hard to get, and we don't want to include stdlib.h here. - Out << "/* get a declaration for alloca */\n" - << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n" - << "#define alloca(x) __builtin_alloca((x))\n" - << "#define _alloca(x) __builtin_alloca((x))\n" - << "#elif defined(__APPLE__)\n" - << "extern void *__builtin_alloca(unsigned long);\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#define longjmp _longjmp\n" - << "#define setjmp _setjmp\n" - << "#elif defined(__sun__)\n" - << "#if defined(__sparcv9)\n" - << "extern void *__builtin_alloca(unsigned long);\n" - << "#else\n" - << "extern void *__builtin_alloca(unsigned int);\n" - << "#endif\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#elif defined(_MSC_VER)\n" - << "#define inline _inline\n" - << "#define alloca(x) _alloca(x)\n" - << "#else\n" - << "#include <alloca.h>\n" - << "#endif\n\n"; - - // We output GCC specific attributes to preserve 'linkonce'ness on globals. - // If we aren't being compiled with GCC, just drop these attributes. - Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n" - << "#define __attribute__(X)\n" - << "#endif\n\n"; - - // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))". - Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n" - << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n" - << "#elif defined(__GNUC__)\n" - << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n" - << "#else\n" - << "#define __EXTERNAL_WEAK__\n" - << "#endif\n\n"; - - // For now, turn off the weak linkage attribute on Mac OS X. (See above.) - Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n" - << "#define __ATTRIBUTE_WEAK__\n" - << "#elif defined(__GNUC__)\n" - << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n" - << "#else\n" - << "#define __ATTRIBUTE_WEAK__\n" - << "#endif\n\n"; - - // Add hidden visibility support. FIXME: APPLE_CC? - Out << "#if defined(__GNUC__)\n" - << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n" - << "#endif\n\n"; - - // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise - // From the GCC documentation: - // - // double __builtin_nan (const char *str) - // - // This is an implementation of the ISO C99 function nan. - // - // Since ISO C99 defines this function in terms of strtod, which we do - // not implement, a description of the parsing is in order. The string is - // parsed as by strtol; that is, the base is recognized by leading 0 or - // 0x prefixes. The number parsed is placed in the significand such that - // the least significant bit of the number is at the least significant - // bit of the significand. The number is truncated to fit the significand - // field provided. The significand is forced to be a quiet NaN. - // - // This function, if given a string literal, is evaluated early enough - // that it is considered a compile-time constant. - // - // float __builtin_nanf (const char *str) - // - // Similar to __builtin_nan, except the return type is float. - // - // double __builtin_inf (void) - // - // Similar to __builtin_huge_val, except a warning is generated if the - // target floating-point format does not support infinities. This - // function is suitable for implementing the ISO C99 macro INFINITY. - // - // float __builtin_inff (void) - // - // Similar to __builtin_inf, except the return type is float. - Out << "#ifdef __GNUC__\n" - << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n" - << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n" - << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n" - << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n" - << "#define LLVM_INF __builtin_inf() /* Double */\n" - << "#define LLVM_INFF __builtin_inff() /* Float */\n" - << "#define LLVM_PREFETCH(addr,rw,locality) " - "__builtin_prefetch(addr,rw,locality)\n" - << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n" - << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n" - << "#define LLVM_ASM __asm__\n" - << "#else\n" - << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n" - << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n" - << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n" - << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n" - << "#define LLVM_INF ((double)0.0) /* Double */\n" - << "#define LLVM_INFF 0.0F /* Float */\n" - << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n" - << "#define __ATTRIBUTE_CTOR__\n" - << "#define __ATTRIBUTE_DTOR__\n" - << "#define LLVM_ASM(X)\n" - << "#endif\n\n"; - - Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n" - << "#define __builtin_stack_save() 0 /* not implemented */\n" - << "#define __builtin_stack_restore(X) /* noop */\n" - << "#endif\n\n"; - - // Output typedefs for 128-bit integers. If these are needed with a - // 32-bit target or with a C compiler that doesn't support mode(TI), - // more drastic measures will be needed. - Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n" - << "typedef int __attribute__((mode(TI))) llvmInt128;\n" - << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n" - << "#endif\n\n"; - - // Output target-specific code that should be inserted into main. - Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n"; -} - -/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into -/// the StaticTors set. -static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){ - ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - if (!InitList) return; - - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. - - if (CS->getOperand(1)->isNullValue()) - return; // Found a null terminator, exit printing. - Constant *FP = CS->getOperand(1); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP)) - if (CE->isCast()) - FP = CE->getOperand(0); - if (Function *F = dyn_cast<Function>(FP)) - StaticTors.insert(F); - } -} - -enum SpecialGlobalClass { - NotSpecial = 0, - GlobalCtors, GlobalDtors, - NotPrinted -}; - -/// getGlobalVariableClass - If this is a global that is specially recognized -/// by LLVM, return a code that indicates how we should handle it. -static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) { - // If this is a global ctors/dtors list, handle it now. - if (GV->hasAppendingLinkage() && GV->use_empty()) { - if (GV->getName() == "llvm.global_ctors") - return GlobalCtors; - else if (GV->getName() == "llvm.global_dtors") - return GlobalDtors; - } - - // Otherwise, if it is other metadata, don't print it. This catches things - // like debug information. - if (GV->getSection() == "llvm.metadata") - return NotPrinted; - - return NotSpecial; -} - -// PrintEscapedString - Print each character of the specified string, escaping -// it if it is not printable or if it is an escape char. -static void PrintEscapedString(const char *Str, unsigned Length, - raw_ostream &Out) { - for (unsigned i = 0; i != Length; ++i) { - unsigned char C = Str[i]; - if (isprint(C) && C != '\\' && C != '"') - Out << C; - else if (C == '\\') - Out << "\\\\"; - else if (C == '\"') - Out << "\\\""; - else if (C == '\t') - Out << "\\t"; - else - Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F); - } -} - -// PrintEscapedString - Print each character of the specified string, escaping -// it if it is not printable or if it is an escape char. -static void PrintEscapedString(const std::string &Str, raw_ostream &Out) { - PrintEscapedString(Str.c_str(), Str.size(), Out); -} - -bool CWriter::doInitialization(Module &M) { - FunctionPass::doInitialization(M); - - // Initialize - TheModule = &M; - - TD = new TargetData(&M); - IL = new IntrinsicLowering(*TD); - IL->AddPrototypes(M); - -#if 0 - std::string Triple = TheModule->getTargetTriple(); - if (Triple.empty()) - Triple = llvm::sys::getDefaultTargetTriple(); - - std::string E; - if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) - TAsm = Match->createMCAsmInfo(Triple); -#endif - TAsm = new CBEMCAsmInfo(); - MRI = new MCRegisterInfo(); - TCtx = new MCContext(*TAsm, *MRI, NULL); - Mang = new Mangler(*TCtx, *TD); - - // Keep track of which functions are static ctors/dtors so they can have - // an attribute added to their prototypes. - std::set<Function*> StaticCtors, StaticDtors; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - switch (getGlobalVariableClass(I)) { - default: break; - case GlobalCtors: - FindStaticTors(I, StaticCtors); - break; - case GlobalDtors: - FindStaticTors(I, StaticDtors); - break; - } - } - - // get declaration for alloca - Out << "/* Provide Declarations */\n"; - Out << "#include <stdarg.h>\n"; // Varargs support - Out << "#include <setjmp.h>\n"; // Unwind support - Out << "#include <limits.h>\n"; // With overflow intrinsics support. - generateCompilerSpecificCode(Out, TD); - - // Provide a definition for `bool' if not compiling with a C++ compiler. - Out << "\n" - << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n" - - << "\n\n/* Support for floating point constants */\n" - << "typedef unsigned long long ConstantDoubleTy;\n" - << "typedef unsigned int ConstantFloatTy;\n" - << "typedef struct { unsigned long long f1; unsigned short f2; " - "unsigned short pad[3]; } ConstantFP80Ty;\n" - // This is used for both kinds of 128-bit long double; meaning differs. - << "typedef struct { unsigned long long f1; unsigned long long f2; }" - " ConstantFP128Ty;\n" - << "\n\n/* Global Declarations */\n"; - - // First output all the declarations for the program, because C requires - // Functions & globals to be declared before they are used. - // - if (!M.getModuleInlineAsm().empty()) { - Out << "/* Module asm statements */\n" - << "asm("; - - // Split the string into lines, to make it easier to read the .ll file. - std::string Asm = M.getModuleInlineAsm(); - size_t CurPos = 0; - size_t NewLine = Asm.find_first_of('\n', CurPos); - while (NewLine != std::string::npos) { - // We found a newline, print the portion of the asm string from the - // last newline up to this newline. - Out << "\""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine), - Out); - Out << "\\n\"\n"; - CurPos = NewLine+1; - NewLine = Asm.find_first_of('\n', CurPos); - } - Out << "\""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out); - Out << "\");\n" - << "/* End Module asm statements */\n"; - } - - // Loop over the symbol table, emitting all named constants. - printModuleTypes(); - - // Global variable declarations... - if (!M.global_empty()) { - Out << "\n/* External Global Variable Declarations */\n"; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - - if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() || - I->hasCommonLinkage()) - Out << "extern "; - else if (I->hasDLLImportLinkage()) - Out << "__declspec(dllimport) "; - else - continue; // Internal Global - - // Thread Local Storage - if (I->isThreadLocal()) - Out << "__thread "; - - printType(Out, I->getType()->getElementType(), false, GetValueName(I)); - - if (I->hasExternalWeakLinkage()) - Out << " __EXTERNAL_WEAK__"; - Out << ";\n"; - } - } - - // Function declarations - Out << "\n/* Function Declarations */\n"; - Out << "double fmod(double, double);\n"; // Support for FP rem - Out << "float fmodf(float, float);\n"; - Out << "long double fmodl(long double, long double);\n"; - - // Store the intrinsics which will be declared/defined below. - SmallVector<const Function*, 8> intrinsicsToDefine; - - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - // Don't print declarations for intrinsic functions. - // Store the used intrinsics, which need to be explicitly defined. - if (I->isIntrinsic()) { - switch (I->getIntrinsicID()) { - default: - break; - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - intrinsicsToDefine.push_back(I); - break; - } - continue; - } - - if (I->getName() == "setjmp" || - I->getName() == "longjmp" || I->getName() == "_setjmp") - continue; - - if (I->hasExternalWeakLinkage()) - Out << "extern "; - printFunctionSignature(I, true); - if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - if (I->hasExternalWeakLinkage()) - Out << " __EXTERNAL_WEAK__"; - if (StaticCtors.count(I)) - Out << " __ATTRIBUTE_CTOR__"; - if (StaticDtors.count(I)) - Out << " __ATTRIBUTE_DTOR__"; - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - - if (I->hasName() && I->getName()[0] == 1) - Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; - - Out << ";\n"; - } - - // Output the global variable declarations - if (!M.global_empty()) { - Out << "\n\n/* Global Variable Declarations */\n"; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!I->isDeclaration()) { - // Ignore special globals, such as debug info. - if (getGlobalVariableClass(I)) - continue; - - if (I->hasLocalLinkage()) - Out << "static "; - else - Out << "extern "; - - // Thread Local Storage - if (I->isThreadLocal()) - Out << "__thread "; - - printType(Out, I->getType()->getElementType(), false, - GetValueName(I)); - - if (I->hasLinkOnceLinkage()) - Out << " __attribute__((common))"; - else if (I->hasCommonLinkage()) // FIXME is this right? - Out << " __ATTRIBUTE_WEAK__"; - else if (I->hasWeakLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - else if (I->hasExternalWeakLinkage()) - Out << " __EXTERNAL_WEAK__"; - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - Out << ";\n"; - } - } - - // Output the global variable definitions and contents... - if (!M.global_empty()) { - Out << "\n\n/* Global Variable Definitions and Initialization */\n"; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!I->isDeclaration()) { - // Ignore special globals, such as debug info. - if (getGlobalVariableClass(I)) - continue; - - if (I->hasLocalLinkage()) - Out << "static "; - else if (I->hasDLLImportLinkage()) - Out << "__declspec(dllimport) "; - else if (I->hasDLLExportLinkage()) - Out << "__declspec(dllexport) "; - - // Thread Local Storage - if (I->isThreadLocal()) - Out << "__thread "; - - printType(Out, I->getType()->getElementType(), false, - GetValueName(I)); - if (I->hasLinkOnceLinkage()) - Out << " __attribute__((common))"; - else if (I->hasWeakLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - else if (I->hasCommonLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - - // If the initializer is not null, emit the initializer. If it is null, - // we try to avoid emitting large amounts of zeros. The problem with - // this, however, occurs when the variable has weak linkage. In this - // case, the assembler will complain about the variable being both weak - // and common, so we disable this optimization. - // FIXME common linkage should avoid this problem. - if (!I->getInitializer()->isNullValue()) { - Out << " = " ; - writeOperand(I->getInitializer(), true); - } else if (I->hasWeakLinkage()) { - // We have to specify an initializer, but it doesn't have to be - // complete. If the value is an aggregate, print out { 0 }, and let - // the compiler figure out the rest of the zeros. - Out << " = " ; - if (I->getInitializer()->getType()->isStructTy() || - I->getInitializer()->getType()->isVectorTy()) { - Out << "{ 0 }"; - } else if (I->getInitializer()->getType()->isArrayTy()) { - // As with structs and vectors, but with an extra set of braces - // because arrays are wrapped in structs. - Out << "{ { 0 } }"; - } else { - // Just print it out normally. - writeOperand(I->getInitializer(), true); - } - } - Out << ";\n"; - } - } - - if (!M.empty()) - Out << "\n\n/* Function Bodies */\n"; - - // Emit some helper functions for dealing with FCMP instruction's - // predicates - Out << "static inline int llvm_fcmp_ord(double X, double Y) { "; - Out << "return X == X && Y == Y; }\n"; - Out << "static inline int llvm_fcmp_uno(double X, double Y) { "; - Out << "return X != X || Y != Y; }\n"; - Out << "static inline int llvm_fcmp_ueq(double X, double Y) { "; - Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_une(double X, double Y) { "; - Out << "return X != Y; }\n"; - Out << "static inline int llvm_fcmp_ult(double X, double Y) { "; - Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_ugt(double X, double Y) { "; - Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_ule(double X, double Y) { "; - Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_uge(double X, double Y) { "; - Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_oeq(double X, double Y) { "; - Out << "return X == Y ; }\n"; - Out << "static inline int llvm_fcmp_one(double X, double Y) { "; - Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n"; - Out << "static inline int llvm_fcmp_olt(double X, double Y) { "; - Out << "return X < Y ; }\n"; - Out << "static inline int llvm_fcmp_ogt(double X, double Y) { "; - Out << "return X > Y ; }\n"; - Out << "static inline int llvm_fcmp_ole(double X, double Y) { "; - Out << "return X <= Y ; }\n"; - Out << "static inline int llvm_fcmp_oge(double X, double Y) { "; - Out << "return X >= Y ; }\n"; - - // Emit definitions of the intrinsics. - for (SmallVector<const Function*, 8>::const_iterator - I = intrinsicsToDefine.begin(), - E = intrinsicsToDefine.end(); I != E; ++I) { - printIntrinsicDefinition(**I, Out); - } - - return false; -} - - -/// Output all floating point constants that cannot be printed accurately... -void CWriter::printFloatingPointConstants(Function &F) { - // Scan the module for floating point constants. If any FP constant is used - // in the function, we want to redirect it here so that we do not depend on - // the precision of the printed form, unless the printed form preserves - // precision. - // - for (constant_iterator I = constant_begin(&F), E = constant_end(&F); - I != E; ++I) - printFloatingPointConstants(*I); - - Out << '\n'; -} - -void CWriter::printFloatingPointConstants(const Constant *C) { - // If this is a constant expression, recursively check for constant fp values. - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) - printFloatingPointConstants(CE->getOperand(i)); - return; - } - - // Otherwise, check for a FP constant that we need to print. - const ConstantFP *FPC = dyn_cast<ConstantFP>(C); - if (FPC == 0 || - // Do not put in FPConstantMap if safe. - isFPCSafeToPrint(FPC) || - // Already printed this constant? - FPConstantMap.count(FPC)) - return; - - FPConstantMap[FPC] = FPCounter; // Number the FP constants - - if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) { - double Val = FPC->getValueAPF().convertToDouble(); - uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); - Out << "static const ConstantDoubleTy FPConstant" << FPCounter++ - << " = 0x" << utohexstr(i) - << "ULL; /* " << Val << " */\n"; - } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) { - float Val = FPC->getValueAPF().convertToFloat(); - uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt(). - getZExtValue(); - Out << "static const ConstantFloatTy FPConstant" << FPCounter++ - << " = 0x" << utohexstr(i) - << "U; /* " << Val << " */\n"; - } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) { - // api needed to prevent premature destruction - APInt api = FPC->getValueAPF().bitcastToAPInt(); - const uint64_t *p = api.getRawData(); - Out << "static const ConstantFP80Ty FPConstant" << FPCounter++ - << " = { 0x" << utohexstr(p[0]) - << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}" - << "}; /* Long double constant */\n"; - } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) || - FPC->getType() == Type::getFP128Ty(FPC->getContext())) { - APInt api = FPC->getValueAPF().bitcastToAPInt(); - const uint64_t *p = api.getRawData(); - Out << "static const ConstantFP128Ty FPConstant" << FPCounter++ - << " = { 0x" - << utohexstr(p[0]) << ", 0x" << utohexstr(p[1]) - << "}; /* Long double constant */\n"; - - } else { - llvm_unreachable("Unknown float type!"); - } -} - - -/// printSymbolTable - Run through symbol table looking for type names. If a -/// type name is found, emit its declaration... -/// -void CWriter::printModuleTypes() { - Out << "/* Helper union for bitcasts */\n"; - Out << "typedef union {\n"; - Out << " unsigned int Int32;\n"; - Out << " unsigned long long Int64;\n"; - Out << " float Float;\n"; - Out << " double Double;\n"; - Out << "} llvmBitCastUnion;\n"; - - // Get all of the struct types used in the module. - std::vector<StructType*> StructTypes; - TheModule->findUsedStructTypes(StructTypes); - - if (StructTypes.empty()) return; - - Out << "/* Structure forward decls */\n"; - - unsigned NextTypeID = 0; - - // If any of them are missing names, add a unique ID to UnnamedStructIDs. - // Print out forward declarations for structure types. - for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { - StructType *ST = StructTypes[i]; - - if (ST->isLiteral() || ST->getName().empty()) - UnnamedStructIDs[ST] = NextTypeID++; - - std::string Name = getStructName(ST); - - Out << "typedef struct " << Name << ' ' << Name << ";\n"; - } - - Out << '\n'; - - // Keep track of which structures have been printed so far. - SmallPtrSet<Type *, 16> StructPrinted; - - // Loop over all structures then push them into the stack so they are - // printed in the correct order. - // - Out << "/* Structure contents */\n"; - for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) - if (StructTypes[i]->isStructTy()) - // Only print out used types! - printContainedStructs(StructTypes[i], StructPrinted); -} - -// Push the struct onto the stack and recursively push all structs -// this one depends on. -// -// TODO: Make this work properly with vector types -// -void CWriter::printContainedStructs(Type *Ty, - SmallPtrSet<Type *, 16> &StructPrinted) { - // Don't walk through pointers. - if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) - return; - - // Print all contained types first. - for (Type::subtype_iterator I = Ty->subtype_begin(), - E = Ty->subtype_end(); I != E; ++I) - printContainedStructs(*I, StructPrinted); - - if (StructType *ST = dyn_cast<StructType>(Ty)) { - // Check to see if we have already printed this struct. - if (!StructPrinted.insert(Ty)) return; - - // Print structure type out. - printType(Out, ST, false, getStructName(ST), true); - Out << ";\n\n"; - } -} - -void CWriter::printFunctionSignature(const Function *F, bool Prototype) { - /// isStructReturn - Should this function actually return a struct by-value? - bool isStructReturn = F->hasStructRetAttr(); - - if (F->hasLocalLinkage()) Out << "static "; - if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) "; - if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) "; - switch (F->getCallingConv()) { - case CallingConv::X86_StdCall: - Out << "__attribute__((stdcall)) "; - break; - case CallingConv::X86_FastCall: - Out << "__attribute__((fastcall)) "; - break; - case CallingConv::X86_ThisCall: - Out << "__attribute__((thiscall)) "; - break; - default: - break; - } - - // Loop over the arguments, printing them... - FunctionType *FT = cast<FunctionType>(F->getFunctionType()); - const AttrListPtr &PAL = F->getAttributes(); - - std::string tstr; - raw_string_ostream FunctionInnards(tstr); - - // Print out the name... - FunctionInnards << GetValueName(F) << '('; - - bool PrintedArg = false; - if (!F->isDeclaration()) { - if (!F->arg_empty()) { - Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - unsigned Idx = 1; - - // If this is a struct-return function, don't print the hidden - // struct-return argument. - if (isStructReturn) { - assert(I != E && "Invalid struct return function!"); - ++I; - ++Idx; - } - - std::string ArgName; - for (; I != E; ++I) { - if (PrintedArg) FunctionInnards << ", "; - if (I->hasName() || !Prototype) - ArgName = GetValueName(I); - else - ArgName = ""; - Type *ArgTy = I->getType(); - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - ByValParams.insert(I); - } - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), - ArgName); - PrintedArg = true; - ++Idx; - } - } - } else { - // Loop over the arguments, printing them. - FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end(); - unsigned Idx = 1; - - // If this is a struct-return function, don't print the hidden - // struct-return argument. - if (isStructReturn) { - assert(I != E && "Invalid struct return function!"); - ++I; - ++Idx; - } - - for (; I != E; ++I) { - if (PrintedArg) FunctionInnards << ", "; - Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(ArgTy->isPointerTy()); - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - } - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt)); - PrintedArg = true; - ++Idx; - } - } - - if (!PrintedArg && FT->isVarArg()) { - FunctionInnards << "int vararg_dummy_arg"; - PrintedArg = true; - } - - // Finish printing arguments... if this is a vararg function, print the ..., - // unless there are no known types, in which case, we just emit (). - // - if (FT->isVarArg() && PrintedArg) { - FunctionInnards << ",..."; // Output varargs portion of signature! - } else if (!FT->isVarArg() && !PrintedArg) { - FunctionInnards << "void"; // ret() -> ret(void) in C. - } - FunctionInnards << ')'; - - // Get the return tpe for the function. - Type *RetTy; - if (!isStructReturn) - RetTy = F->getReturnType(); - else { - // If this is a struct-return function, print the struct-return type. - RetTy = cast<PointerType>(FT->getParamType(0))->getElementType(); - } - - // Print out the return type and the signature built above. - printType(Out, RetTy, - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), - FunctionInnards.str()); -} - -static inline bool isFPIntBitCast(const Instruction &I) { - if (!isa<BitCastInst>(I)) - return false; - Type *SrcTy = I.getOperand(0)->getType(); - Type *DstTy = I.getType(); - return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || - (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); -} - -void CWriter::printFunction(Function &F) { - /// isStructReturn - Should this function actually return a struct by-value? - bool isStructReturn = F.hasStructRetAttr(); - - printFunctionSignature(&F, false); - Out << " {\n"; - - // If this is a struct return function, handle the result with magic. - if (isStructReturn) { - Type *StructTy = - cast<PointerType>(F.arg_begin()->getType())->getElementType(); - Out << " "; - printType(Out, StructTy, false, "StructReturn"); - Out << "; /* Struct return temporary */\n"; - - Out << " "; - printType(Out, F.arg_begin()->getType(), false, - GetValueName(F.arg_begin())); - Out << " = &StructReturn;\n"; - } - - bool PrintedVar = false; - - // print local variable information for the function - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - if (const AllocaInst *AI = isDirectAlloca(&*I)) { - Out << " "; - printType(Out, AI->getAllocatedType(), false, GetValueName(AI)); - Out << "; /* Address-exposed local */\n"; - PrintedVar = true; - } else if (I->getType() != Type::getVoidTy(F.getContext()) && - !isInlinableInst(*I)) { - Out << " "; - printType(Out, I->getType(), false, GetValueName(&*I)); - Out << ";\n"; - - if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well... - Out << " "; - printType(Out, I->getType(), false, - GetValueName(&*I)+"__PHI_TEMPORARY"); - Out << ";\n"; - } - PrintedVar = true; - } - // We need a temporary for the BitCast to use so it can pluck a value out - // of a union to do the BitCast. This is separate from the need for a - // variable to hold the result of the BitCast. - if (isFPIntBitCast(*I)) { - Out << " llvmBitCastUnion " << GetValueName(&*I) - << "__BITCAST_TEMPORARY;\n"; - PrintedVar = true; - } - } - - if (PrintedVar) - Out << '\n'; - - if (F.hasExternalLinkage() && F.getName() == "main") - Out << " CODE_FOR_MAIN();\n"; - - // print the basic blocks - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (Loop *L = LI->getLoopFor(BB)) { - if (L->getHeader() == BB && L->getParentLoop() == 0) - printLoop(L); - } else { - printBasicBlock(BB); - } - } - - Out << "}\n\n"; -} - -void CWriter::printLoop(Loop *L) { - Out << " do { /* Syntactic loop '" << L->getHeader()->getName() - << "' to make GCC happy */\n"; - for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) { - BasicBlock *BB = L->getBlocks()[i]; - Loop *BBLoop = LI->getLoopFor(BB); - if (BBLoop == L) - printBasicBlock(BB); - else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L) - printLoop(BBLoop); - } - Out << " } while (1); /* end of syntactic loop '" - << L->getHeader()->getName() << "' */\n"; -} - -void CWriter::printBasicBlock(BasicBlock *BB) { - - // Don't print the label for the basic block if there are no uses, or if - // the only terminator use is the predecessor basic block's terminator. - // We have to scan the use list because PHI nodes use basic blocks too but - // do not require a label to be generated. - // - bool NeedsLabel = false; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (isGotoCodeNecessary(*PI, BB)) { - NeedsLabel = true; - break; - } - - if (NeedsLabel) Out << GetValueName(BB) << ":\n"; - - // Output all of the instructions in the basic block... - for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E; - ++II) { - if (!isInlinableInst(*II) && !isDirectAlloca(II)) { - if (II->getType() != Type::getVoidTy(BB->getContext()) && - !isInlineAsm(*II)) - outputLValue(II); - else - Out << " "; - writeInstComputationInline(*II); - Out << ";\n"; - } - } - - // Don't emit prefix or suffix for the terminator. - visit(*BB->getTerminator()); -} - - -// Specific Instruction type classes... note that all of the casts are -// necessary because we use the instruction classes as opaque types... -// -void CWriter::visitReturnInst(ReturnInst &I) { - // If this is a struct return function, return the temporary struct. - bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr(); - - if (isStructReturn) { - Out << " return StructReturn;\n"; - return; - } - - // Don't output a void return if this is the last basic block in the function - if (I.getNumOperands() == 0 && - &*--I.getParent()->getParent()->end() == I.getParent() && - !I.getParent()->size() == 1) { - return; - } - - Out << " return"; - if (I.getNumOperands()) { - Out << ' '; - writeOperand(I.getOperand(0)); - } - Out << ";\n"; -} - -void CWriter::visitSwitchInst(SwitchInst &SI) { - - Value* Cond = SI.getCondition(); - - Out << " switch ("; - writeOperand(Cond); - Out << ") {\n default:\n"; - printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2); - printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); - Out << ";\n"; - - // Skip the first item since that's the default case. - for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - ConstantInt* CaseVal = i.getCaseValue(); - BasicBlock* Succ = i.getCaseSuccessor(); - Out << " case "; - writeOperand(CaseVal); - Out << ":\n"; - printPHICopiesForSuccessor (SI.getParent(), Succ, 2); - printBranchToBlock(SI.getParent(), Succ, 2); - if (Function::iterator(Succ) == - llvm::next(Function::iterator(SI.getParent()))) - Out << " break;\n"; - } - - Out << " }\n"; -} - -void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) { - Out << " goto *(void*)("; - writeOperand(IBI.getOperand(0)); - Out << ");\n"; -} - -void CWriter::visitUnreachableInst(UnreachableInst &I) { - Out << " /*UNREACHABLE*/;\n"; -} - -bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) { - /// FIXME: This should be reenabled, but loop reordering safe!! - return true; - - if (llvm::next(Function::iterator(From)) != Function::iterator(To)) - return true; // Not the direct successor, we need a goto. - - //isa<SwitchInst>(From->getTerminator()) - - if (LI->getLoopFor(From) != LI->getLoopFor(To)) - return true; - return false; -} - -void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock, - BasicBlock *Successor, - unsigned Indent) { - for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - // Now we have to do the printing. - Value *IV = PN->getIncomingValueForBlock(CurBlock); - if (!isa<UndefValue>(IV)) { - Out << std::string(Indent, ' '); - Out << " " << GetValueName(I) << "__PHI_TEMPORARY = "; - writeOperand(IV); - Out << "; /* for PHI node */\n"; - } - } -} - -void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ, - unsigned Indent) { - if (isGotoCodeNecessary(CurBB, Succ)) { - Out << std::string(Indent, ' ') << " goto "; - writeOperand(Succ); - Out << ";\n"; - } -} - -// Branch instruction printing - Avoid printing out a branch to a basic block -// that immediately succeeds the current one. -// -void CWriter::visitBranchInst(BranchInst &I) { - - if (I.isConditional()) { - if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) { - Out << " if ("; - writeOperand(I.getCondition()); - Out << ") {\n"; - - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(0), 2); - - if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) { - Out << " } else {\n"; - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(1), 2); - } - } else { - // First goto not necessary, assume second one is... - Out << " if (!"; - writeOperand(I.getCondition()); - Out << ") {\n"; - - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(1), 2); - } - - Out << " }\n"; - } else { - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0); - printBranchToBlock(I.getParent(), I.getSuccessor(0), 0); - } - Out << "\n"; -} - -// PHI nodes get copied into temporary values at the end of predecessor basic -// blocks. We now need to copy these temporary values into the REAL value for -// the PHI. -void CWriter::visitPHINode(PHINode &I) { - writeOperand(&I); - Out << "__PHI_TEMPORARY"; -} - - -void CWriter::visitBinaryOperator(Instruction &I) { - // binary instructions, shift instructions, setCond instructions. - assert(!I.getType()->isPointerTy()); - - // We must cast the results of binary operations which might be promoted. - bool needsCast = false; - if ((I.getType() == Type::getInt8Ty(I.getContext())) || - (I.getType() == Type::getInt16Ty(I.getContext())) - || (I.getType() == Type::getFloatTy(I.getContext()))) { - needsCast = true; - Out << "(("; - printType(Out, I.getType(), false); - Out << ")("; - } - - // If this is a negation operation, print it out as such. For FP, we don't - // want to print "-0.0 - X". - if (BinaryOperator::isNeg(&I)) { - Out << "-("; - writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I))); - Out << ")"; - } else if (BinaryOperator::isFNeg(&I)) { - Out << "-("; - writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I))); - Out << ")"; - } else if (I.getOpcode() == Instruction::FRem) { - // Output a call to fmod/fmodf instead of emitting a%b - if (I.getType() == Type::getFloatTy(I.getContext())) - Out << "fmodf("; - else if (I.getType() == Type::getDoubleTy(I.getContext())) - Out << "fmod("; - else // all 3 flavors of long double - Out << "fmodl("; - writeOperand(I.getOperand(0)); - Out << ", "; - writeOperand(I.getOperand(1)); - Out << ")"; - } else { - - // Write out the cast of the instruction's value back to the proper type - // if necessary. - bool NeedsClosingParens = writeInstructionCast(I); - - // Certain instructions require the operand to be forced to a specific type - // so we use writeOperandWithCast here instead of writeOperand. Similarly - // below for operand 1 - writeOperandWithCast(I.getOperand(0), I.getOpcode()); - - switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl : Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - default: -#ifndef NDEBUG - errs() << "Invalid operator type!" << I; -#endif - llvm_unreachable(0); - } - - writeOperandWithCast(I.getOperand(1), I.getOpcode()); - if (NeedsClosingParens) - Out << "))"; - } - - if (needsCast) { - Out << "))"; - } -} - -void CWriter::visitICmpInst(ICmpInst &I) { - // We must cast the results of icmp which might be promoted. - bool needsCast = false; - - // Write out the cast of the instruction's value back to the proper type - // if necessary. - bool NeedsClosingParens = writeInstructionCast(I); - - // Certain icmp predicate require the operand to be forced to a specific type - // so we use writeOperandWithCast here instead of writeOperand. Similarly - // below for operand 1 - writeOperandWithCast(I.getOperand(0), I); - - switch (I.getPredicate()) { - case ICmpInst::ICMP_EQ: Out << " == "; break; - case ICmpInst::ICMP_NE: Out << " != "; break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: Out << " <= "; break; - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: Out << " >= "; break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: Out << " < "; break; - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: Out << " > "; break; - default: -#ifndef NDEBUG - errs() << "Invalid icmp predicate!" << I; -#endif - llvm_unreachable(0); - } - - writeOperandWithCast(I.getOperand(1), I); - if (NeedsClosingParens) - Out << "))"; - - if (needsCast) { - Out << "))"; - } -} - -void CWriter::visitFCmpInst(FCmpInst &I) { - if (I.getPredicate() == FCmpInst::FCMP_FALSE) { - Out << "0"; - return; - } - if (I.getPredicate() == FCmpInst::FCMP_TRUE) { - Out << "1"; - return; - } - - const char* op = 0; - switch (I.getPredicate()) { - default: llvm_unreachable("Illegal FCmp predicate"); - case FCmpInst::FCMP_ORD: op = "ord"; break; - case FCmpInst::FCMP_UNO: op = "uno"; break; - case FCmpInst::FCMP_UEQ: op = "ueq"; break; - case FCmpInst::FCMP_UNE: op = "une"; break; - case FCmpInst::FCMP_ULT: op = "ult"; break; - case FCmpInst::FCMP_ULE: op = "ule"; break; - case FCmpInst::FCMP_UGT: op = "ugt"; break; - case FCmpInst::FCMP_UGE: op = "uge"; break; - case FCmpInst::FCMP_OEQ: op = "oeq"; break; - case FCmpInst::FCMP_ONE: op = "one"; break; - case FCmpInst::FCMP_OLT: op = "olt"; break; - case FCmpInst::FCMP_OLE: op = "ole"; break; - case FCmpInst::FCMP_OGT: op = "ogt"; break; - case FCmpInst::FCMP_OGE: op = "oge"; break; - } - - Out << "llvm_fcmp_" << op << "("; - // Write the first operand - writeOperand(I.getOperand(0)); - Out << ", "; - // Write the second operand - writeOperand(I.getOperand(1)); - Out << ")"; -} - -static const char * getFloatBitCastField(Type *Ty) { - switch (Ty->getTypeID()) { - default: llvm_unreachable("Invalid Type"); - case Type::FloatTyID: return "Float"; - case Type::DoubleTyID: return "Double"; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits <= 32) - return "Int32"; - else - return "Int64"; - } - } -} - -void CWriter::visitCastInst(CastInst &I) { - Type *DstTy = I.getType(); - Type *SrcTy = I.getOperand(0)->getType(); - if (isFPIntBitCast(I)) { - Out << '('; - // These int<->float and long<->double casts need to be handled specially - Out << GetValueName(&I) << "__BITCAST_TEMPORARY." - << getFloatBitCastField(I.getOperand(0)->getType()) << " = "; - writeOperand(I.getOperand(0)); - Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY." - << getFloatBitCastField(I.getType()); - Out << ')'; - return; - } - - Out << '('; - printCast(I.getOpcode(), SrcTy, DstTy); - - // Make a sext from i1 work by subtracting the i1 from 0 (an int). - if (SrcTy == Type::getInt1Ty(I.getContext()) && - I.getOpcode() == Instruction::SExt) - Out << "0-"; - - writeOperand(I.getOperand(0)); - - if (DstTy == Type::getInt1Ty(I.getContext()) && - (I.getOpcode() == Instruction::Trunc || - I.getOpcode() == Instruction::FPToUI || - I.getOpcode() == Instruction::FPToSI || - I.getOpcode() == Instruction::PtrToInt)) { - // Make sure we really get a trunc to bool by anding the operand with 1 - Out << "&1u"; - } - Out << ')'; -} - -void CWriter::visitSelectInst(SelectInst &I) { - Out << "(("; - writeOperand(I.getCondition()); - Out << ") ? ("; - writeOperand(I.getTrueValue()); - Out << ") : ("; - writeOperand(I.getFalseValue()); - Out << "))"; -} - -// Returns the macro name or value of the max or min of an integer type -// (as defined in limits.h). -static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax, - raw_ostream &Out) { - const char* type; - const char* sprefix = ""; - - unsigned NumBits = Ty.getBitWidth(); - if (NumBits <= 8) { - type = "CHAR"; - sprefix = "S"; - } else if (NumBits <= 16) { - type = "SHRT"; - } else if (NumBits <= 32) { - type = "INT"; - } else if (NumBits <= 64) { - type = "LLONG"; - } else { - llvm_unreachable("Bit widths > 64 not implemented yet"); - } - - if (isSigned) - Out << sprefix << type << (isMax ? "_MAX" : "_MIN"); - else - Out << "U" << type << (isMax ? "_MAX" : "0"); -} - -#ifndef NDEBUG -static bool isSupportedIntegerSize(IntegerType &T) { - return T.getBitWidth() == 8 || T.getBitWidth() == 16 || - T.getBitWidth() == 32 || T.getBitWidth() == 64; -} -#endif - -void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) { - FunctionType *funT = F.getFunctionType(); - Type *retT = F.getReturnType(); - IntegerType *elemT = cast<IntegerType>(funT->getParamType(1)); - - assert(isSupportedIntegerSize(*elemT) && - "CBackend does not support arbitrary size integers."); - assert(cast<StructType>(retT)->getElementType(0) == elemT && - elemT == funT->getParamType(0) && funT->getNumParams() == 2); - - switch (F.getIntrinsicID()) { - default: - llvm_unreachable("Unsupported Intrinsic."); - case Intrinsic::uadd_with_overflow: - // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) { - // Rty r; - // r.field0 = a + b; - // r.field1 = (r.field0 < a); - // return r; - // } - Out << "static inline "; - printType(Out, retT); - Out << GetValueName(&F); - Out << "("; - printSimpleType(Out, elemT, false); - Out << "a,"; - printSimpleType(Out, elemT, false); - Out << "b) {\n "; - printType(Out, retT); - Out << "r;\n"; - Out << " r.field0 = a + b;\n"; - Out << " r.field1 = (r.field0 < a);\n"; - Out << " return r;\n}\n"; - break; - - case Intrinsic::sadd_with_overflow: - // static inline Rty sadd_ixx(ixx a, ixx b) { - // Rty r; - // r.field1 = (b > 0 && a > XX_MAX - b) || - // (b < 0 && a < XX_MIN - b); - // r.field0 = r.field1 ? 0 : a + b; - // return r; - // } - Out << "static "; - printType(Out, retT); - Out << GetValueName(&F); - Out << "("; - printSimpleType(Out, elemT, true); - Out << "a,"; - printSimpleType(Out, elemT, true); - Out << "b) {\n "; - printType(Out, retT); - Out << "r;\n"; - Out << " r.field1 = (b > 0 && a > "; - printLimitValue(*elemT, true, true, Out); - Out << " - b) || (b < 0 && a < "; - printLimitValue(*elemT, true, false, Out); - Out << " - b);\n"; - Out << " r.field0 = r.field1 ? 0 : a + b;\n"; - Out << " return r;\n}\n"; - break; - } -} - -void CWriter::lowerIntrinsics(Function &F) { - // This is used to keep track of intrinsics that get generated to a lowered - // function. We must generate the prototypes before the function body which - // will only be expanded on first use (by the loop below). - std::vector<Function*> prototypesToGen; - - // Examine all the instructions in this function to find the intrinsics that - // need to be lowered. - for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) - if (CallInst *CI = dyn_cast<CallInst>(I++)) - if (Function *F = CI->getCalledFunction()) - switch (F->getIntrinsicID()) { - case Intrinsic::not_intrinsic: - case Intrinsic::vastart: - case Intrinsic::vacopy: - case Intrinsic::vaend: - case Intrinsic::returnaddress: - case Intrinsic::frameaddress: - case Intrinsic::setjmp: - case Intrinsic::longjmp: - case Intrinsic::prefetch: - case Intrinsic::powi: - case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse_cmp_ps: - case Intrinsic::x86_sse2_cmp_sd: - case Intrinsic::x86_sse2_cmp_pd: - case Intrinsic::ppc_altivec_lvsl: - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - // We directly implement these intrinsics - break; - default: - // If this is an intrinsic that directly corresponds to a GCC - // builtin, we handle it. - const char *BuiltinName = ""; -#define GET_GCC_BUILTIN_NAME -#include "llvm/Intrinsics.gen" -#undef GET_GCC_BUILTIN_NAME - // If we handle it, don't lower it. - if (BuiltinName[0]) break; - - // All other intrinsic calls we must lower. - Instruction *Before = 0; - if (CI != &BB->front()) - Before = prior(BasicBlock::iterator(CI)); - - IL->LowerIntrinsicCall(CI); - if (Before) { // Move iterator to instruction after call - I = Before; ++I; - } else { - I = BB->begin(); - } - // If the intrinsic got lowered to another call, and that call has - // a definition then we need to make sure its prototype is emitted - // before any calls to it. - if (CallInst *Call = dyn_cast<CallInst>(I)) - if (Function *NewF = Call->getCalledFunction()) - if (!NewF->isDeclaration()) - prototypesToGen.push_back(NewF); - - break; - } - - // We may have collected some prototypes to emit in the loop above. - // Emit them now, before the function that uses them is emitted. But, - // be careful not to emit them twice. - std::vector<Function*>::iterator I = prototypesToGen.begin(); - std::vector<Function*>::iterator E = prototypesToGen.end(); - for ( ; I != E; ++I) { - if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) { - Out << '\n'; - printFunctionSignature(*I, true); - Out << ";\n"; - } - } -} - -void CWriter::visitCallInst(CallInst &I) { - if (isa<InlineAsm>(I.getCalledValue())) - return visitInlineAsm(I); - - bool WroteCallee = false; - - // Handle intrinsic function calls first... - if (Function *F = I.getCalledFunction()) - if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) - if (visitBuiltinCall(I, ID, WroteCallee)) - return; - - Value *Callee = I.getCalledValue(); - - PointerType *PTy = cast<PointerType>(Callee->getType()); - FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - - // If this is a call to a struct-return function, assign to the first - // parameter instead of passing it to the call. - const AttrListPtr &PAL = I.getAttributes(); - bool hasByVal = I.hasByValArgument(); - bool isStructRet = I.hasStructRetAttr(); - if (isStructRet) { - writeOperandDeref(I.getArgOperand(0)); - Out << " = "; - } - - if (I.isTailCall()) Out << " /*tail*/ "; - - if (!WroteCallee) { - // If this is an indirect call to a struct return function, we need to cast - // the pointer. Ditto for indirect calls with byval arguments. - bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee); - - // GCC is a real PITA. It does not permit codegening casts of functions to - // function pointers if they are in a call (it generates a trap instruction - // instead!). We work around this by inserting a cast to void* in between - // the function and the function pointer cast. Unfortunately, we can't just - // form the constant expression here, because the folder will immediately - // nuke it. - // - // Note finally, that this is completely unsafe. ANSI C does not guarantee - // that void* and function pointers have the same size. :( To deal with this - // in the common case, we handle casts where the number of arguments passed - // match exactly. - // - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee)) - if (CE->isCast()) - if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) { - NeedsCast = true; - Callee = RF; - } - - if (NeedsCast) { - // Ok, just cast the pointer type. - Out << "(("; - if (isStructRet) - printStructReturnPointerFunctionType(Out, PAL, - cast<PointerType>(I.getCalledValue()->getType())); - else if (hasByVal) - printType(Out, I.getCalledValue()->getType(), false, "", true, PAL); - else - printType(Out, I.getCalledValue()->getType()); - Out << ")(void*)"; - } - writeOperand(Callee); - if (NeedsCast) Out << ')'; - } - - Out << '('; - - bool PrintedArg = false; - if(FTy->isVarArg() && !FTy->getNumParams()) { - Out << "0 /*dummy arg*/"; - PrintedArg = true; - } - - unsigned NumDeclaredParams = FTy->getNumParams(); - CallSite CS(&I); - CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - unsigned ArgNo = 0; - if (isStructRet) { // Skip struct return argument. - ++AI; - ++ArgNo; - } - - - for (; AI != AE; ++AI, ++ArgNo) { - if (PrintedArg) Out << ", "; - if (ArgNo < NumDeclaredParams && - (*AI)->getType() != FTy->getParamType(ArgNo)) { - Out << '('; - printType(Out, FTy->getParamType(ArgNo), - /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt)); - Out << ')'; - } - // Check if the argument is expected to be passed by value. - if (I.paramHasAttr(ArgNo+1, Attribute::ByVal)) - writeOperandDeref(*AI); - else - writeOperand(*AI); - PrintedArg = true; - } - Out << ')'; -} - -/// visitBuiltinCall - Handle the call to the specified builtin. Returns true -/// if the entire call is handled, return false if it wasn't handled, and -/// optionally set 'WroteCallee' if the callee has already been printed out. -bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, - bool &WroteCallee) { - switch (ID) { - default: { - // If this is an intrinsic that directly corresponds to a GCC - // builtin, we emit it here. - const char *BuiltinName = ""; - Function *F = I.getCalledFunction(); -#define GET_GCC_BUILTIN_NAME -#include "llvm/Intrinsics.gen" -#undef GET_GCC_BUILTIN_NAME - assert(BuiltinName[0] && "Unknown LLVM intrinsic!"); - - Out << BuiltinName; - WroteCallee = true; - return false; - } - case Intrinsic::vastart: - Out << "0; "; - - Out << "va_start(*(va_list*)"; - writeOperand(I.getArgOperand(0)); - Out << ", "; - // Output the last argument to the enclosing function. - if (I.getParent()->getParent()->arg_empty()) - Out << "vararg_dummy_arg"; - else - writeOperand(--I.getParent()->getParent()->arg_end()); - Out << ')'; - return true; - case Intrinsic::vaend: - if (!isa<ConstantPointerNull>(I.getArgOperand(0))) { - Out << "0; va_end(*(va_list*)"; - writeOperand(I.getArgOperand(0)); - Out << ')'; - } else { - Out << "va_end(*(va_list*)0)"; - } - return true; - case Intrinsic::vacopy: - Out << "0; "; - Out << "va_copy(*(va_list*)"; - writeOperand(I.getArgOperand(0)); - Out << ", *(va_list*)"; - writeOperand(I.getArgOperand(1)); - Out << ')'; - return true; - case Intrinsic::returnaddress: - Out << "__builtin_return_address("; - writeOperand(I.getArgOperand(0)); - Out << ')'; - return true; - case Intrinsic::frameaddress: - Out << "__builtin_frame_address("; - writeOperand(I.getArgOperand(0)); - Out << ')'; - return true; - case Intrinsic::powi: - Out << "__builtin_powi("; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ')'; - return true; - case Intrinsic::setjmp: - Out << "setjmp(*(jmp_buf*)"; - writeOperand(I.getArgOperand(0)); - Out << ')'; - return true; - case Intrinsic::longjmp: - Out << "longjmp(*(jmp_buf*)"; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ')'; - return true; - case Intrinsic::prefetch: - Out << "LLVM_PREFETCH((const void *)"; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ", "; - writeOperand(I.getArgOperand(2)); - Out << ")"; - return true; - case Intrinsic::stacksave: - // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save() - // to work around GCC bugs (see PR1809). - Out << "0; *((void**)&" << GetValueName(&I) - << ") = __builtin_stack_save()"; - return true; - case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse_cmp_ps: - case Intrinsic::x86_sse2_cmp_sd: - case Intrinsic::x86_sse2_cmp_pd: - Out << '('; - printType(Out, I.getType()); - Out << ')'; - // Multiple GCC builtins multiplex onto this intrinsic. - switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) { - default: llvm_unreachable("Invalid llvm.x86.sse.cmp!"); - case 0: Out << "__builtin_ia32_cmpeq"; break; - case 1: Out << "__builtin_ia32_cmplt"; break; - case 2: Out << "__builtin_ia32_cmple"; break; - case 3: Out << "__builtin_ia32_cmpunord"; break; - case 4: Out << "__builtin_ia32_cmpneq"; break; - case 5: Out << "__builtin_ia32_cmpnlt"; break; - case 6: Out << "__builtin_ia32_cmpnle"; break; - case 7: Out << "__builtin_ia32_cmpord"; break; - } - if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd) - Out << 'p'; - else - Out << 's'; - if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps) - Out << 's'; - else - Out << 'd'; - - Out << "("; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ")"; - return true; - case Intrinsic::ppc_altivec_lvsl: - Out << '('; - printType(Out, I.getType()); - Out << ')'; - Out << "__builtin_altivec_lvsl(0, (void*)"; - writeOperand(I.getArgOperand(0)); - Out << ")"; - return true; - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - Out << GetValueName(I.getCalledFunction()) << "("; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ")"; - return true; - } -} - -//This converts the llvm constraint string to something gcc is expecting. -//TODO: work out platform independent constraints and factor those out -// of the per target tables -// handle multiple constraint codes -std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { - assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle"); - - // Grab the translation table from MCAsmInfo if it exists. - const MCAsmInfo *TargetAsm; - std::string Triple = TheModule->getTargetTriple(); - if (Triple.empty()) - Triple = llvm::sys::getDefaultTargetTriple(); - - std::string E; - if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) - TargetAsm = Match->createMCAsmInfo(Triple); - else - return c.Codes[0]; - - const char *const *table = TargetAsm->getAsmCBE(); - - // Search the translation table if it exists. - for (int i = 0; table && table[i]; i += 2) - if (c.Codes[0] == table[i]) { - delete TargetAsm; - return table[i+1]; - } - - // Default is identity. - delete TargetAsm; - return c.Codes[0]; -} - -//TODO: import logic from AsmPrinter.cpp -static std::string gccifyAsm(std::string asmstr) { - for (std::string::size_type i = 0; i != asmstr.size(); ++i) - if (asmstr[i] == '\n') - asmstr.replace(i, 1, "\\n"); - else if (asmstr[i] == '\t') - asmstr.replace(i, 1, "\\t"); - else if (asmstr[i] == '$') { - if (asmstr[i + 1] == '{') { - std::string::size_type a = asmstr.find_first_of(':', i + 1); - std::string::size_type b = asmstr.find_first_of('}', i + 1); - std::string n = "%" + - asmstr.substr(a + 1, b - a - 1) + - asmstr.substr(i + 2, a - i - 2); - asmstr.replace(i, b - i + 1, n); - i += n.size() - 1; - } else - asmstr.replace(i, 1, "%"); - } - else if (asmstr[i] == '%')//grr - { asmstr.replace(i, 1, "%%"); ++i;} - - return asmstr; -} - -//TODO: assumptions about what consume arguments from the call are likely wrong -// handle communitivity -void CWriter::visitInlineAsm(CallInst &CI) { - InlineAsm* as = cast<InlineAsm>(CI.getCalledValue()); - InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints(); - - std::vector<std::pair<Value*, int> > ResultVals; - if (CI.getType() == Type::getVoidTy(CI.getContext())) - ; - else if (StructType *ST = dyn_cast<StructType>(CI.getType())) { - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) - ResultVals.push_back(std::make_pair(&CI, (int)i)); - } else { - ResultVals.push_back(std::make_pair(&CI, -1)); - } - - // Fix up the asm string for gcc and emit it. - Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n"; - Out << " :"; - - unsigned ValueCount = 0; - bool IsFirst = true; - - // Convert over all the output constraints. - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - - if (I->Type != InlineAsm::isOutput) { - ++ValueCount; - continue; // Ignore non-output constraints. - } - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - // Unpack the dest. - Value *DestVal; - int DestValNo = -1; - - if (ValueCount < ResultVals.size()) { - DestVal = ResultVals[ValueCount].first; - DestValNo = ResultVals[ValueCount].second; - } else - DestVal = CI.getArgOperand(ValueCount-ResultVals.size()); - - if (I->isEarlyClobber) - C = "&"+C; - - Out << "\"=" << C << "\"(" << GetValueName(DestVal); - if (DestValNo != -1) - Out << ".field" << DestValNo; // Multiple retvals. - Out << ")"; - ++ValueCount; - } - - - // Convert over all the input constraints. - Out << "\n :"; - IsFirst = true; - ValueCount = 0; - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - if (I->Type != InlineAsm::isInput) { - ++ValueCount; - continue; // Ignore non-input constraints. - } - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); - Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size()); - - Out << "\"" << C << "\"("; - if (!I->isIndirect) - writeOperand(SrcVal); - else - writeOperandDeref(SrcVal); - Out << ")"; - } - - // Convert over the clobber constraints. - IsFirst = true; - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - if (I->Type != InlineAsm::isClobber) - continue; // Ignore non-input constraints. - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - Out << '\"' << C << '"'; - } - - Out << ")"; -} - -void CWriter::visitAllocaInst(AllocaInst &I) { - Out << '('; - printType(Out, I.getType()); - Out << ") alloca(sizeof("; - printType(Out, I.getType()->getElementType()); - Out << ')'; - if (I.isArrayAllocation()) { - Out << " * " ; - writeOperand(I.getOperand(0)); - } - Out << ')'; -} - -void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, - gep_type_iterator E, bool Static) { - - // If there are no indices, just print out the pointer. - if (I == E) { - writeOperand(Ptr); - return; - } - - // Find out if the last index is into a vector. If so, we have to print this - // specially. Since vectors can't have elements of indexable type, only the - // last index could possibly be of a vector element. - VectorType *LastIndexIsVector = 0; - { - for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI) - LastIndexIsVector = dyn_cast<VectorType>(*TmpI); - } - - Out << "("; - - // If the last index is into a vector, we can't print it as &a[i][j] because - // we can't index into a vector with j in GCC. Instead, emit this as - // (((float*)&a[i])+j) - if (LastIndexIsVector) { - Out << "(("; - printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType())); - Out << ")("; - } - - Out << '&'; - - // If the first index is 0 (very typical) we can do a number of - // simplifications to clean up the code. - Value *FirstOp = I.getOperand(); - if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) { - // First index isn't simple, print it the hard way. - writeOperand(Ptr); - } else { - ++I; // Skip the zero index. - - // Okay, emit the first operand. If Ptr is something that is already address - // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead. - if (isAddressExposed(Ptr)) { - writeOperandInternal(Ptr, Static); - } else if (I != E && (*I)->isStructTy()) { - // If we didn't already emit the first operand, see if we can print it as - // P->f instead of "P[0].f" - writeOperand(Ptr); - Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); - ++I; // eat the struct index as well. - } else { - // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic. - Out << "(*"; - writeOperand(Ptr); - Out << ")"; - } - } - - for (; I != E; ++I) { - if ((*I)->isStructTy()) { - Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); - } else if ((*I)->isArrayTy()) { - Out << ".array["; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << ']'; - } else if (!(*I)->isVectorTy()) { - Out << '['; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << ']'; - } else { - // If the last index is into a vector, then print it out as "+j)". This - // works with the 'LastIndexIsVector' code above. - if (isa<Constant>(I.getOperand()) && - cast<Constant>(I.getOperand())->isNullValue()) { - Out << "))"; // avoid "+0". - } else { - Out << ")+("; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << "))"; - } - } - } - Out << ")"; -} - -void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType, - bool IsVolatile, unsigned Alignment) { - - bool IsUnaligned = Alignment && - Alignment < TD->getABITypeAlignment(OperandType); - - if (!IsUnaligned) - Out << '*'; - if (IsVolatile || IsUnaligned) { - Out << "(("; - if (IsUnaligned) - Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {"; - printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*"); - if (IsUnaligned) { - Out << "; } "; - if (IsVolatile) Out << "volatile "; - Out << "*"; - } - Out << ")"; - } - - writeOperand(Operand); - - if (IsVolatile || IsUnaligned) { - Out << ')'; - if (IsUnaligned) - Out << "->data"; - } -} - -void CWriter::visitLoadInst(LoadInst &I) { - writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(), - I.getAlignment()); - -} - -void CWriter::visitStoreInst(StoreInst &I) { - writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(), - I.isVolatile(), I.getAlignment()); - Out << " = "; - Value *Operand = I.getOperand(0); - Constant *BitMask = 0; - if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType())) - if (!ITy->isPowerOf2ByteWidth()) - // We have a bit width that doesn't match an even power-of-2 byte - // size. Consequently we must & the value with the type's bit mask - BitMask = ConstantInt::get(ITy, ITy->getBitMask()); - if (BitMask) - Out << "(("; - writeOperand(Operand); - if (BitMask) { - Out << ") & "; - printConstant(BitMask, false); - Out << ")"; - } -} - -void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) { - printGEPExpression(I.getPointerOperand(), gep_type_begin(I), - gep_type_end(I), false); -} - -void CWriter::visitVAArgInst(VAArgInst &I) { - Out << "va_arg(*(va_list*)"; - writeOperand(I.getOperand(0)); - Out << ", "; - printType(Out, I.getType()); - Out << ");\n "; -} - -void CWriter::visitInsertElementInst(InsertElementInst &I) { - Type *EltTy = I.getType()->getElementType(); - writeOperand(I.getOperand(0)); - Out << ";\n "; - Out << "(("; - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(&I) << "))["; - writeOperand(I.getOperand(2)); - Out << "] = ("; - writeOperand(I.getOperand(1)); - Out << ")"; -} - -void CWriter::visitExtractElementInst(ExtractElementInst &I) { - // We know that our operand is not inlined. - Out << "(("; - Type *EltTy = - cast<VectorType>(I.getOperand(0)->getType())->getElementType(); - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(I.getOperand(0)) << "))["; - writeOperand(I.getOperand(1)); - Out << "]"; -} - -void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) { - Out << "("; - printType(Out, SVI.getType()); - Out << "){ "; - VectorType *VT = SVI.getType(); - unsigned NumElts = VT->getNumElements(); - Type *EltTy = VT->getElementType(); - - for (unsigned i = 0; i != NumElts; ++i) { - if (i) Out << ", "; - int SrcVal = SVI.getMaskValue(i); - if ((unsigned)SrcVal >= NumElts*2) { - Out << " 0/*undef*/ "; - } else { - Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts); - if (isa<Instruction>(Op)) { - // Do an extractelement of this value from the appropriate input. - Out << "(("; - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(Op) - << "))[" << (SrcVal & (NumElts-1)) << "]"; - } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) { - Out << "0"; - } else { - printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal & - (NumElts-1)), - false); - } - } - } - Out << "}"; -} - -void CWriter::visitInsertValueInst(InsertValueInst &IVI) { - // Start by copying the entire aggregate value into the result variable. - writeOperand(IVI.getOperand(0)); - Out << ";\n "; - - // Then do the insert to update the field. - Out << GetValueName(&IVI); - for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); - i != e; ++i) { - Type *IndexedTy = - ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), - makeArrayRef(b, i+1)); - if (IndexedTy->isArrayTy()) - Out << ".array[" << *i << "]"; - else - Out << ".field" << *i; - } - Out << " = "; - writeOperand(IVI.getOperand(1)); -} - -void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { - Out << "("; - if (isa<UndefValue>(EVI.getOperand(0))) { - Out << "("; - printType(Out, EVI.getType()); - Out << ") 0/*UNDEF*/"; - } else { - Out << GetValueName(EVI.getOperand(0)); - for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); - i != e; ++i) { - Type *IndexedTy = - ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), - makeArrayRef(b, i+1)); - if (IndexedTy->isArrayTy()) - Out << ".array[" << *i << "]"; - else - Out << ".field" << *i; - } - } - Out << ")"; -} - -//===----------------------------------------------------------------------===// -// External Interface declaration -//===----------------------------------------------------------------------===// - -bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - bool DisableVerify) { - if (FileType != TargetMachine::CGFT_AssemblyFile) return true; - - PM.add(createGCLoweringPass()); - PM.add(createLowerInvokePass()); - PM.add(createCFGSimplificationPass()); // clean up after lower invoke. - PM.add(new CWriter(o)); - PM.add(createGCInfoDeleter()); - return false; -} diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt deleted file mode 100644 index fa819a4..0000000 --- a/lib/Target/CBackend/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_llvm_target(CBackendCodeGen - CBackend.cpp - ) - -add_subdirectory(TargetInfo) diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h deleted file mode 100644 index 8b2286e..0000000 --- a/lib/Target/CBackend/CTargetMachine.h +++ /dev/null @@ -1,42 +0,0 @@ -//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the TargetMachine that is used by the C backend. -// -//===----------------------------------------------------------------------===// - -#ifndef CTARGETMACHINE_H -#define CTARGETMACHINE_H - -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" - -namespace llvm { - -struct CTargetMachine : public TargetMachine { - CTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : TargetMachine(T, TT, CPU, FS, Options) { } - - virtual bool addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - bool DisableVerify); - - virtual const TargetData *getTargetData() const { return 0; } -}; - -extern Target TheCBackendTarget; - -} // End llvm namespace - - -#endif diff --git a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp deleted file mode 100644 index e8274ff..0000000 --- a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "CTargetMachine.h" -#include "llvm/Module.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheCBackendTarget; - -extern "C" void LLVMInitializeCBackendTargetInfo() { - RegisterTarget<> X(TheCBackendTarget, "c", "C backend"); -} - -extern "C" void LLVMInitializeCBackendTargetMC() {} diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt deleted file mode 100644 index 6203616..0000000 --- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMCBackendInfo - CBackendTargetInfo.cpp - ) diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index d8bc743..5913a9c 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMTarget TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp + TargetMachineC.cpp TargetRegisterInfo.cpp TargetSubtargetInfo.cpp ) diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h index a3717b0..d26449e 100644 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h @@ -15,9 +15,7 @@ #define SPUMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target TheCellSPUTarget; diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index 9f9692b..9bc6be7 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -11,10 +11,6 @@ // //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A> - : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; - //===----------------------------------------------------------------------===// // Return Value Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 55b3f72..0623741 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -3158,7 +3158,6 @@ SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, //! Compute used/known bits for a SPU operand void SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -3224,7 +3223,7 @@ bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, return (V > -(1 << 18) && V < (1 << 18) - 1); } -bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { +bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { return false; } diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 25c5355..e3db7b2 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -121,7 +121,6 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp index ebfefe2..eec2d25 100644 --- a/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -15,7 +15,6 @@ #include "SPU.h" #include "SPURegisterInfo.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 107c6cc..69f0ff8 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -33,8 +33,9 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include <algorithm> -#include <set> +#include <cstdio> #include <map> +#include <set> using namespace llvm; static cl::opt<std::string> @@ -195,6 +196,18 @@ void CppWriter::error(const std::string& msg) { report_fatal_error(msg); } +static inline std::string ftostr(const APFloat& V) { + std::string Buf; + if (&V.getSemantics() == &APFloat::IEEEdouble) { + raw_string_ostream(Buf) << V.convertToDouble(); + return Buf; + } else if (&V.getSemantics() == &APFloat::IEEEsingle) { + raw_string_ostream(Buf) << (double)V.convertToFloat(); + return Buf; + } + return "<unknown format in ftostr>"; // error +} + // printCFP - Print a floating point constant .. very carefully :) // This makes sure that conversion to/from floating yields the same binary // result so that we don't lose precision. diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 6c5da72..af9e813 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(HexagonCodeGen HexagonExpandPredSpillCode.cpp HexagonFrameLowering.cpp HexagonHardwareLoops.cpp + HexagonMCInstLower.cpp HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp HexagonISelLowering.cpp @@ -27,8 +28,9 @@ add_llvm_target(HexagonCodeGen HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp - ) +) add_subdirectory(TargetInfo) +add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 270c7a7..0808323 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -17,10 +17,14 @@ #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { class FunctionPass; class TargetMachine; + class MachineInstr; + class MCInst; + class HexagonAsmPrinter; class HexagonTargetMachine; class raw_ostream; @@ -30,13 +34,23 @@ namespace llvm { FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM); FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM); - FunctionPass* createHexagonSplitTFRCondSets(HexagonTargetMachine &TM); - FunctionPass* createHexagonExpandPredSpillCode(HexagonTargetMachine &TM); + FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM); + FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); +/* TODO: object output. + MCCodeEmitter *createHexagonMCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx); +*/ +/* TODO: assembler input. + TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &); +*/ + void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI, + HexagonAsmPrinter &AP); } // end namespace llvm; #define Hexagon_POINTER_SIZE 4 @@ -50,4 +64,10 @@ namespace llvm { // a new stack frame. This takes 8 bytes. #define HEXAGON_LRFP_SIZE 8 +// Normal instruction size (in bytes). +#define HEXAGON_INSTR_SIZE 4 + +// Maximum number of words in a packet (in instructions). +#define HEXAGON_PACKET_SIZE 4 + #endif diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index ab5093d..4a50d16 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -39,10 +39,7 @@ include "HexagonInstrInfo.td" include "HexagonIntrinsics.td" include "HexagonIntrinsicsDerived.td" - -def HexagonInstrInfo : InstrInfo { - // Define how we want to layout our target-specific information field. -} +def HexagonInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// // Hexagon processors supported. @@ -56,6 +53,13 @@ def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>; def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>; def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>; +// Hexagon Uses the MC printer for assembler output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def HexagonAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// @@ -63,4 +67,6 @@ def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>; def Hexagon : Target { // Pull in Instruction Info: let InstructionSet = HexagonInstrInfo; + + let AssemblyWriters = [HexagonAsmWriter]; } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index bf333b7..39bf45d 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -16,25 +16,33 @@ #define DEBUG_TYPE "asm-printer" #include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonMachineFunctionInfo.h" #include "HexagonTargetMachine.h" #include "HexagonSubtarget.h" -#include "HexagonMachineFunctionInfo.h" +#include "InstPrinter/HexagonInstPrinter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" @@ -43,8 +51,8 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; @@ -53,163 +61,9 @@ static cl::opt<bool> AlignCalls( "hexagon-align-calls", cl::Hidden, cl::init(true), cl::desc("Insert falign after call instruction for Hexagon target")); - -namespace { - class HexagonAsmPrinter : public AsmPrinter { - const HexagonSubtarget *Subtarget; - - public: - explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<HexagonSubtarget>(); - } - - virtual const char *getPassName() const { - return "Hexagon Assembly Printer"; - } - - /// printInstruction - This method is automatically generated by tablegen - /// from the instruction set description. This method returns true if the - /// machine instruction was sufficiently described to print it, otherwise it - void printInstruction(const MachineInstr *MI, raw_ostream &O); - virtual void EmitInstruction(const MachineInstr *MI); - - void printOp(const MachineOperand &MO, raw_ostream &O); - - /// printRegister - Print register according to target requirements. - /// - void printRegister(const MachineOperand &MO, bool R0AsZero, - raw_ostream &O) { - unsigned RegNo = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); - O << getRegisterName(RegNo); - } - - void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) { - const MachineOperand &MO = MI->getOperand(OpNo); - if (MO.isReg()) { - printRegister(MO, false, OS); - } else if (MO.isImm()) { - OS << MO.getImm(); - } else { - printOp(MO, OS); - } - } - - - bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS); - - - void printHexagonImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - int value = MI->getOperand(OpNo).getImm(); - O << value; - } - - - void printHexagonNegImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - int value = MI->getOperand(OpNo).getImm(); - O << -value; - } - - void printHexagonNOneImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) const { - O << -1; - } - - void printHexagonMEMriOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); - - O << getRegisterName(MO1.getReg()) - << " + #" - << (int) MO2.getImm(); - } - - - void printHexagonFrameIndexOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); - - O << getRegisterName(MO1.getReg()) - << ", #" - << MO2.getImm(); - } - - void printBranchOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - // Branches can take an immediate operand. This is used by the branch - // selection pass to print $+8, an eight byte displacement from the PC. - if (MI->getOperand(OpNo).isImm()) { - O << "$+" << MI->getOperand(OpNo).getImm()*4; - } else { - printOp(MI->getOperand(OpNo), O); - } - } - - void printCallOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - } - - void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - } - - - void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - O << "#HI("; - if (MI->getOperand(OpNo).isImm()) { - printHexagonImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - } - O << ")"; - } - - void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - O << "#HI("; - if (MI->getOperand(OpNo).isImm()) { - printHexagonImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - } - O << ")"; - } - - void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O); - - void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo, - raw_ostream &O); - - void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O); - void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O); - - void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const; - - static const char *getRegisterName(unsigned RegNo); - }; - -} // end of anonymous namespace - -// Include the auto-generated portion of the assembly writer. -#include "HexagonGenAsmWriter.inc" - - void HexagonAsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { - - // For basic block level alignment, use falign. + // For basic block level alignment, use ".falign". if (!GV) { OutStreamer.EmitRawText(StringRef("\t.falign")); return; @@ -218,12 +72,19 @@ void HexagonAsmPrinter::EmitAlignment(unsigned NumBits, AsmPrinter::EmitAlignment(NumBits, GV); } -void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { +void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + default: + assert(0 && "<unknown operand type>"); + case MachineOperand::MO_Register: + O << HexagonInstPrinter::getRegisterName(MO.getReg()); + return; case MachineOperand::MO_Immediate: - dbgs() << "printOp() does not handle immediate values\n"; - abort(); - + O << MO.getImm(); + return; case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); return; @@ -237,20 +98,14 @@ void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { case MachineOperand::MO_ExternalSymbol: O << *GetExternalSymbolSymbol(MO.getSymbolName()); return; - case MachineOperand::MO_GlobalAddress: { + case MachineOperand::MO_GlobalAddress: // Computing the address of a global symbol, not calling it. O << *Mang->getSymbol(MO.getGlobal()); printOffset(MO.getOffset(), O); return; } - - default: - O << "<unknown operand type: " << MO.getType() << ">"; - return; - } } - // // isBlockOnlyReachableByFallthrough - We need to override this since the // default AsmPrinter does not print labels for any basic block that @@ -273,7 +128,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS) { + raw_ostream &OS) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. @@ -341,154 +196,11 @@ void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI, /// the current output stream. /// void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream O(Str); - - const MachineFunction* MF = MI->getParent()->getParent(); - const HexagonMachineFunctionInfo* MFI = - (const HexagonMachineFunctionInfo*) - MF->getInfo<HexagonMachineFunctionInfo>(); - + MCInst MCI; + HexagonLowerToMC(MI, MCI, *this); + OutStreamer.EmitInstruction(MCI); - // Print a brace for the beginning of the packet. - if (MFI->isStartPacket(MI)) { - O << "\t{" << '\n'; - } - - DEBUG( O << "// MI = " << *MI << '\n';); - - // Indent - O << "\t"; - - - if (MI->getOpcode() == Hexagon::ENDLOOP0) { - if (MFI->isEndPacket(MI) && MFI->isStartPacket(MI)) { - O << "\t{ nop }"; - } else { - O << "}"; - } - printInstruction(MI, O); - } else if (MI->getOpcode() == Hexagon::MPYI_rin) { - // Handle multipy with -ve constant on Hexagon: - // "$dst =- mpyi($src1, #$src2)" - printOperand(MI, 0, O); - O << " =- mpyi("; - printOperand(MI, 1, O); - O << ", #"; - printHexagonNegImmOperand(MI, 2, O); - O << ")"; - } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_indexed_MEM_V4) { - // - // Handle memw(Rs+u6:2) [+-]= #U5 - // - O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_MEM_V4) { - // - // Handle memw(Rs+u6:2) [+-]= #U5 - // - O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_indexed_MEM_V4) { - // - // Handle memh(Rs+u6:1) [+-]= #U5 - // - O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_MEM_V4) { - // - // Handle memh(Rs+u6:1) [+-]= #U5 - // - O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_indexed_MEM_V4) { - // - // Handle memb(Rs+u6:1) [+-]= #U5 - // - O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_MEM_V4) { - // - // Handle memb(Rs+u6:1) [+-]= #U5 - // - O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::CMPbGTri_V4) { - // - // Handle Pd=cmpb.gt(Rs,#s8) - // - O << "\t"; - printRegister(MI->getOperand(0), false, O); - O << " = cmpb.gt("; - printRegister(MI->getOperand(1), false, O); - O << ", "; - int val = MI->getOperand(2).getImm() >> 24; - O << "#" << val << ")" << '\n'; - } else if (MI->getOpcode() == Hexagon::CMPhEQri_V4) { - // - // Handle Pd=cmph.eq(Rs,#8) - // - O << "\t"; - printRegister(MI->getOperand(0), false, O); - O << " = cmph.eq("; - printRegister(MI->getOperand(1), false, O); - O << ", "; - int val = MI->getOperand(2).getImm(); - assert((((0 <= val) && (val <= 127)) || - ((65408 <= val) && (val <= 65535))) && - "Not in correct range!"); - if (val >= 65408) val -= 65536; - O << "#" << val << ")" << '\n'; - } else if (MI->getOpcode() == Hexagon::CMPhGTri_V4) { - // - // Handle Pd=cmph.gt(Rs,#8) - // - O << "\t"; - printRegister(MI->getOperand(0), false, O); - O << " = cmph.gt("; - printRegister(MI->getOperand(1), false, O); - O << ", "; - int val = MI->getOperand(2).getImm() >> 16; - O << "#" << val << ")" << '\n'; - } else { - printInstruction(MI, O); - } - - // Print a brace for the end of the packet. - if (MFI->isEndPacket(MI) && MI->getOpcode() != Hexagon::ENDLOOP0) { - O << "\n\t}" << '\n'; - } - - if (AlignCalls && MI->getDesc().isCall()) { - O << "\n\t.falign" << "\n"; - } - - OutStreamer.EmitRawText(O.str()); return; } @@ -507,7 +219,7 @@ void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI, const MachineOperand &MO1 = MI->getOperand(OpNo); const MachineOperand &MO2 = MI->getOperand(OpNo+1); - O << getRegisterName(MO1.getReg()) + O << HexagonInstPrinter::getRegisterName(MO1.getReg()) << " + #" << MO2.getImm(); } @@ -536,6 +248,31 @@ void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo, O << *GetJTISymbol(MO.getIndex()); } +void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) && + "Expecting constant pool index"); + + // Hexagon_TODO: Do we need name mangling? + O << *GetCPISymbol(MO.getIndex()); +} + +static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return(new HexagonInstPrinter(MAI, MII, MRI)); + else + return NULL; +} + extern "C" void LLVMInitializeHexagonAsmPrinter() { RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); + + TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, + createHexagonMCInstPrinter); } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h new file mode 100755 index 0000000..bc2af63 --- /dev/null +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -0,0 +1,165 @@ +//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hexagon Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONASMPRINTER_H +#define HEXAGONASMPRINTER_H + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class HexagonAsmPrinter : public AsmPrinter { + const HexagonSubtarget *Subtarget; + + public: + explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<HexagonSubtarget>(); + } + + virtual const char *getPassName() const { + return "Hexagon Assembly Printer"; + } + + bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + + virtual void EmitInstruction(const MachineInstr *MI); + virtual void EmitAlignment(unsigned NumBits, + const GlobalValue *GV = 0) const; + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + void printInstruction(const MachineInstr *MI, raw_ostream &O); + + // void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO, raw_ostream &O); + + /// printRegister - Print register according to target requirements. + /// + void printRegister(const MachineOperand &MO, bool R0AsZero, + raw_ostream &O) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); + O << getRegisterName(RegNo); + } + + void printImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << value; + } + + void printNegImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << -value; + } + + void printMEMriOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << " + #" + << (int) MO2.getImm(); + } + + void printFrameIndexOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << ", #" + << MO2.getImm(); + } + + void printBranchOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + if (MI->getOperand(OpNo).isImm()) { + O << "$+" << MI->getOperand(OpNo).getImm()*4; + } else { + printOp(MI->getOperand(OpNo), O); + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printImmOperand(MI, OpNo, O); + } + else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printImmOperand(MI, OpNo, O); + } + else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O); + +#if 0 + void printModuleLevelGV(const GlobalVariable* GVar, raw_ostream &O); +#endif + + void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo, + raw_ostream &O); + + void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O); + void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O); + void printConstantPool(const MachineInstr *MI, int OpNo, raw_ostream &O); + + static const char *getRegisterName(unsigned RegNo); + +#if 0 + void EmitStartOfAsmFile(Module &M); +#endif + }; + +} // end of llvm namespace + +#endif diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td index 18589a2..e78bb79 100644 --- a/lib/Target/Hexagon/HexagonImmediates.td +++ b/lib/Target/Hexagon/HexagonImmediates.td @@ -10,211 +10,211 @@ // From IA64's InstrInfo file def s32Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s16Imm : Operand<i32> { - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s12Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11_0Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11_1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11_2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11_3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s10Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s9Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s8Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s8Imm64 : Operand<i64> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s6Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4_0Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4_1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4_2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4_3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u64Imm : Operand<i64> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u32Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u16Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u16_0Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u16_1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u16_2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u11_3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u10Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u9Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u8Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u7Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6_0Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6_1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6_2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6_3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u5Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u4Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def n8Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def m6Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def nOneImm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonNOneImmOperand"; + let PrintMethod = "printNOneImmOperand"; } // @@ -494,7 +494,7 @@ def m6ImmPred : PatLeaf<(i32 imm), [{ //InN means negative integers in [-(2^N - 1), 0] def n8ImmPred : PatLeaf<(i32 imm), [{ - // n8ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // n8ImmPred predicate - True if the immediate fits in a 8-bit signed // field. int64_t v = (int64_t)N->getSExtValue(); return (-255 <= v && v <= 0); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3d7ace5..77b3663 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "Hexagon.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/DFAPacketizer.h" diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index f3c6622..b563ac3 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -40,24 +40,24 @@ def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>; // Address operands. def MEMrr : Operand<i32> { - let PrintMethod = "printHexagonMEMrrOperand"; + let PrintMethod = "printMEMrrOperand"; let MIOperandInfo = (ops IntRegs, IntRegs); } // Address operands def MEMri : Operand<i32> { - let PrintMethod = "printHexagonMEMriOperand"; + let PrintMethod = "printMEMriOperand"; let MIOperandInfo = (ops IntRegs, IntRegs); } def MEMri_s11_2 : Operand<i32>, ComplexPattern<i32, 2, "SelectMEMriS11_2", []> { - let PrintMethod = "printHexagonMEMriOperand"; + let PrintMethod = "printMEMriOperand"; let MIOperandInfo = (ops IntRegs, s11Imm); } def FrameIndex : Operand<i32> { - let PrintMethod = "printHexagonFrameIndexOperand"; + let PrintMethod = "printFrameIndexOperand"; let MIOperandInfo = (ops IntRegs, s11Imm); } diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp new file mode 100644 index 0000000..fbb331b --- /dev/null +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -0,0 +1,93 @@ +//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Hexagon MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, + HexagonAsmPrinter& Printer) { + MCContext &MC = Printer.OutContext; + const MCExpr *ME; + + ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC); + + if (!MO.isJTI() && MO.getOffset()) + ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC), + MC); + + return (MCOperand::CreateExpr(ME)); +} + +// Create an MCInst from a MachineInstr +void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI, + HexagonAsmPrinter& AP) { + MCI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCO; + + switch (MO.getType()) { + default: + MI->dump(); + assert(0 && "unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCO = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_FPImmediate: { + APFloat Val = MO.getFPImm()->getValueAPF(); + // FP immediates are used only when setting GPRs, so they may be dealt + // with like regular immediates from this point on. + MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData()); + break; + } + case MachineOperand::MO_Immediate: + MCO = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCO = MCOperand::CreateExpr + (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), + AP.OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCO = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP); + break; + case MachineOperand::MO_ExternalSymbol: + MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), + AP); + break; + case MachineOperand::MO_JumpTableIndex: + MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_BlockAddress: + MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP); + break; + } + + MCI.addOperand(MCO); + } +} diff --git a/lib/Target/Hexagon/InstPrinter/CMakeLists.txt b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..cb106a8 --- /dev/null +++ b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_library(LLVMHexagonAsmPrinter + HexagonInstPrinter.cpp + ) + +add_dependencies(LLVMHexagonAsmPrinter HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp new file mode 100644 index 0000000..ef36881 --- /dev/null +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -0,0 +1,170 @@ +//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Hexagon MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonInstPrinter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdio> + +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "HexagonGenAsmWriter.inc" + +StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { + return MII.getName(Opcode); +} + +StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); +} + +void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + const char packetPadding[] = " "; + const char startPacket = '{', + endPacket = '}'; + // TODO: add outer HW loop when it's supported too. + if (MI->getOpcode() == Hexagon::ENDLOOP0) { + MCInst Nop; + + O << packetPadding << startPacket << '\n'; + Nop.setOpcode(Hexagon::NOP); + printInstruction(&Nop, O); + O << packetPadding << endPacket; + } + + printInstruction(MI, O); + printAnnotation(O, Annot); +} + +void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + + if (MO.isReg()) { + O << getRegisterName(MO.getReg()); + } else if(MO.isExpr()) { + O << *MO.getExpr(); + } else if(MO.isImm()) { + printImmOperand(MI, OpNo, O); + } else { + assert(false && "Unknown operand"); + } +} + +void HexagonInstPrinter::printImmOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + O << MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + O << MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printUnsignedImmOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + O << MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + O << -MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printNOneImmOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + O << -1; +} + +void HexagonInstPrinter::printMEMriOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + const MCOperand& MO0 = MI->getOperand(OpNo); + const MCOperand& MO1 = MI->getOperand(OpNo + 1); + + O << getRegisterName(MO0.getReg()); + O << " + #" << MO1.getImm(); +} + +void HexagonInstPrinter::printFrameIndexOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + const MCOperand& MO0 = MI->getOperand(OpNo); + const MCOperand& MO1 = MI->getOperand(OpNo + 1); + + O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm(); +} + +void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + assert(MO.isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + assert(MO.isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + assert(MO.isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + assert("Unknown branch operand."); +} + +void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, + raw_ostream &O, bool hi) const { + const MCOperand& MO = MI->getOperand(OpNo); + + O << '#' << (hi? "HI": "LO") << '('; + if (MO.isImm()) { + O << '#'; + printOperand(MI, OpNo, O); + } else { + assert("Unknown symbol operand"); + printOperand(MI, OpNo, O); + } + O << ')'; +} diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h new file mode 100644 index 0000000..dad4334 --- /dev/null +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h @@ -0,0 +1,73 @@ +//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Hexagon MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONINSTPRINTER_H +#define HEXAGONINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class HexagonInstPrinter : public MCInstPrinter { + public: + explicit HexagonInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + virtual StringRef getOpcodeName(unsigned Opcode) const; + void printInstruction(const MCInst *MI, raw_ostream &O); + StringRef getRegName(unsigned RegNo) const; + static const char *getRegisterName(unsigned RegNo); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + + void printConstantPool(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + + void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const + { printSymbol(MI, OpNo, O, true); } + void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const + { printSymbol(MI, OpNo, O, false); } + + bool isConstExtended(const MCInst *MI) const; + protected: + void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) + const; + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt index 1b47d8e..8678401 100644 --- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/CBackend/TargetInfo/LLVMBuild.txt -----------*- Conf -*--===; +;===- ./lib/Target/Hexagon/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,7 +17,7 @@ [component_0] type = Library -name = CBackendInfo -parent = CBackend -required_libraries = MC Support Target -add_to_library_groups = CBackend +name = HexagonAsmPrinter +parent = Hexagon +required_libraries = MC Support +add_to_library_groups = Hexagon diff --git a/lib/Target/CBackend/TargetInfo/Makefile b/lib/Target/Hexagon/InstPrinter/Makefile index d4d5e15..20331d8 100644 --- a/lib/Target/CBackend/TargetInfo/Makefile +++ b/lib/Target/Hexagon/InstPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/CBackend/TargetInfo/Makefile -------------*- Makefile -*-===## +##===- lib/Target/Hexagon/InstPrinter/Makefile ----------------------------===## # # The LLVM Compiler Infrastructure # @@ -7,9 +7,9 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../../.. -LIBRARYNAME = LLVMCBackendInfo +LIBRARYNAME = LLVMHexagonAsmPrinter -# Hack: we need to include 'main' target directory to grab private headers +# Hack: we need to include 'main' Hexagon target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt index 84ea6a0..c6d419a 100644 --- a/lib/Target/Hexagon/LLVMBuild.txt +++ b/lib/Target/Hexagon/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = TargetInfo MCTargetDesc +subdirectories = InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -28,5 +28,5 @@ has_asmprinter = 1 type = Library name = HexagonCodeGen parent = Hexagon -required_libraries = AsmPrinter CodeGen Core HexagonInfo SelectionDAG Support Target MC HexagonDesc +required_libraries = AsmPrinter CodeGen Core HexagonAsmPrinter HexagonDesc HexagonInfo MC SelectionDAG Support Target add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index b18d23a..2238b1a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -17,7 +17,6 @@ namespace llvm { class MCSubtargetInfo; class Target; -class StringRef; extern Target TheHexagonTarget; diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile index 34bc68d..dc387c5 100644 --- a/lib/Target/Hexagon/Makefile +++ b/lib/Target/Hexagon/Makefile @@ -16,9 +16,8 @@ BUILT_SOURCES = HexagonGenRegisterInfo.inc \ HexagonGenAsmWriter.inc \ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \ HexagonGenCallingConv.inc \ - HexagonGenDFAPacketizer.inc \ - HexagonAsmPrinter.cpp + HexagonGenDFAPacketizer.inc -DIRS = TargetInfo MCTargetDesc +DIRS = InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 5a42ca5..8ec5673 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CBackend CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore +subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp index 7105b2e..59a1ed9 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp @@ -9,9 +9,6 @@ #include "MCTargetDesc/MBlazeBaseInfo.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" - #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index c1b003b..38fb0e8 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -18,9 +18,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" using namespace llvm; diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index 236583a..51ba7c3 100644 --- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -21,15 +21,15 @@ namespace llvm { class MBlazeInstPrinter : public MCInstPrinter { public: - MBlazeInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + MBlazeInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier = 0); diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index c751dd8..55fffe3 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -38,7 +38,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td index 4962573..00a4219 100644 --- a/lib/Target/MBlaze/MBlazeCallingConv.td +++ b/lib/Target/MBlaze/MBlazeCallingConv.td @@ -9,10 +9,6 @@ // This describes the calling conventions for MBlaze architecture. //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A>: - CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>; - //===----------------------------------------------------------------------===// // MBlaze ABI Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp index 60a65bb..e3c7236 100644 --- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp @@ -100,8 +100,8 @@ unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const { long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelOffset, unsigned RelTy) const { - if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL) - return SymOffset - (RelOffset + 4); - - llvm_unreachable("computeRelocation unknown for this relocation type"); + assert((RelTy == ELF::R_MICROBLAZE_32_PCREL || + RelTy == ELF::R_MICROBLAZE_64_PCREL) && + "computeRelocation unknown for this relocation type"); + return SymOffset - (RelOffset + 4); } diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/lib/Target/MBlaze/MBlazeELFWriterInfo.h index 63bfc0d..a314eb7 100644 --- a/lib/Target/MBlaze/MBlazeELFWriterInfo.h +++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.h @@ -17,6 +17,7 @@ #include "llvm/Target/TargetELFWriterInfo.h" namespace llvm { + class TargetMachine; class MBlazeELFWriterInfo : public TargetELFWriterInfo { public: diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index 6531064..d2f14a5 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -211,13 +211,13 @@ static void analyzeFrameIndexes(MachineFunction &MF) { static void interruptFrameLayout(MachineFunction &MF) { const Function *F = MF.getFunction(); - llvm::CallingConv::ID CallConv = F->getCallingConv(); + CallingConv::ID CallConv = F->getCallingConv(); // If this function is not using either the interrupt_handler // calling convention or the save_volatiles calling convention // then we don't need to do any additional frame layout. - if (CallConv != llvm::CallingConv::MBLAZE_INTR && - CallConv != llvm::CallingConv::MBLAZE_SVOL) + if (CallConv != CallingConv::MBLAZE_INTR && + CallConv != CallingConv::MBLAZE_SVOL) return; MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -228,7 +228,7 @@ static void interruptFrameLayout(MachineFunction &MF) { // Determine if the calling convention is the interrupt_handler // calling convention. Some pieces of the prologue and epilogue // only need to be emitted if we are lowering and interrupt handler. - bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR; + bool isIntr = CallConv == CallingConv::MBLAZE_INTR; // Determine where to put prologue and epilogue additions MachineBasicBlock &MENT = MF.front(); @@ -347,8 +347,8 @@ void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); - bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == CallingConv::MBLAZE_INTR; // Determine the correct frame layout determineFrameLayout(MF); @@ -393,8 +393,8 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); - llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); - bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == CallingConv::MBLAZE_INTR; // Get the FI's where RA and FP are saved. int FPOffset = MBlazeFI->getFPStackOffset(); @@ -431,8 +431,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); - llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); - bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == CallingConv::MBLAZE_INTR; if (MFI->adjustsStack() || requiresRA) { MBlazeFI->setRAStackOffset(0); diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 9ef6bb6..edfc335 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -1046,10 +1046,10 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // If this function is using the interrupt_handler calling convention // then use "rtid r14, 0" otherwise use "rtsd r15, 8" - unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet - : MBlazeISD::Ret; - unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14 - : MBlaze::R15; + unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet + : MBlazeISD::Ret; + unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14 + : MBlaze::R15; SDValue DReg = DAG.getRegister(Reg, MVT::i32); if (Flag.getNode()) diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h index 36bf655..977f9a6 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h @@ -14,7 +14,6 @@ #ifndef MBLAZETARGETASMINFO_H #define MBLAZETARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index 5da0aa7..9a7549b 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -95,10 +95,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new MBlazeInstPrinter(MAI, MRI); + return new MBlazeInstPrinter(MAI, MII, MRI); return 0; } diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h index 088d163..ae82c32 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h @@ -25,7 +25,6 @@ class MCObjectWriter; class MCSubtargetInfo; class Target; class StringRef; -class formatted_raw_ostream; class raw_ostream; extern Target TheMBlazeTarget; diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 3fd7ce0..d32eb3a 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -21,8 +21,9 @@ namespace llvm { class MSP430InstPrinter : public MCInstPrinter { public: - MSP430InstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index 5e5f3d8..2e328cb 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MSP430MCAsmInfo.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; void MSP430MCAsmInfo::anchor() { } diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index 690fc19..e5c2fc2 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h @@ -14,10 +14,10 @@ #ifndef MSP430TARGETASMINFO_H #define MSP430TARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { + class StringRef; class Target; class MSP430MCAsmInfo : public MCAsmInfo { diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 8545055..c455f6b 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -61,10 +61,11 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new MSP430InstPrinter(MAI, MRI); + return new MSP430InstPrinter(MAI, MII, MRI); return 0; } diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h index 35f2590..7f3505c 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h @@ -15,9 +15,7 @@ #define MSP430MCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target TheMSP430Target; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 53ad155..786a0c5 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -22,12 +22,13 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -static bool isAcceptableChar(char C, bool AllowPeriod) { +static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) { if ((C < 'a' || C > 'z') && (C < 'A' || C > 'Z') && (C < '0' || C > '9') && C != '_' && C != '$' && C != '@' && - !(AllowPeriod && C == '.')) + !(AllowPeriod && C == '.') && + !(AllowUTF8 && (C & 0x80))) return false; return true; } @@ -56,8 +57,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { // If any of the characters in the string is an unacceptable character, force // quotes. bool AllowPeriod = MAI.doesAllowPeriodsInName(); + bool AllowUTF8 = MAI.doesAllowUTF8(); for (unsigned i = 0, e = Str.size(); i != e; ++i) - if (!isAcceptableChar(Str[i], AllowPeriod)) + if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8)) return true; return false; } @@ -74,8 +76,9 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str, } bool AllowPeriod = MAI.doesAllowPeriodsInName(); + bool AllowUTF8 = MAI.doesAllowUTF8(); for (unsigned i = 0, e = Str.size(); i != e; ++i) { - if (!isAcceptableChar(Str[i], AllowPeriod)) + if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8)) MangleLetter(OutName, Str[i]); else OutName.push_back(Str[i]); diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 13d17e4..0500c5d 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -2,12 +2,14 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info) tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM MipsGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter) tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen @@ -32,6 +34,7 @@ add_llvm_target(MipsCodeGen ) add_subdirectory(InstPrinter) +add_subdirectory(Disassembler) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) add_subdirectory(AsmParser) diff --git a/lib/Target/Mips/Disassembler/CMakeLists.txt b/lib/Target/Mips/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..fe1dc75 --- /dev/null +++ b/lib/Target/Mips/Disassembler/CMakeLists.txt @@ -0,0 +1,15 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMipsDisassembler + MipsDisassembler.cpp + ) + +# workaround for hanging compilation on MSVC9 and 10 +if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) +set_property( + SOURCE MipsDisassembler.cpp + PROPERTY COMPILE_FLAGS "/Od" + ) +endif() + +add_dependencies(LLVMMipsDisassembler MipsCommonTableGen) diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt index e64feb0..048ad0d 100644 --- a/lib/Target/CBackend/LLVMBuild.txt +++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/CBackend/LLVMBuild.txt ----------------------*- Conf -*--===; +;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt --------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -15,17 +15,9 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = TargetInfo - [component_0] -type = TargetGroup -name = CBackend -parent = Target - -[component_1] type = Library -name = CBackendCodeGen -parent = CBackend -required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils -add_to_library_groups = CBackend +name = MipsDisassembler +parent = Mips +required_libraries = MC Support MipsInfo +add_to_library_groups = Mips diff --git a/lib/Target/CBackend/Makefile b/lib/Target/Mips/Disassembler/Makefile index bac3474..a78feba 100644 --- a/lib/Target/CBackend/Makefile +++ b/lib/Target/Mips/Disassembler/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===## +##===- lib/Target/Mips/Disassembler/Makefile ----------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -7,10 +7,10 @@ # ##===----------------------------------------------------------------------===## -LEVEL = ../../.. -LIBRARYNAME = LLVMCBackendCodeGen -DIRS = TargetInfo +LEVEL = ../../../.. +LIBRARYNAME = LLVMMipsDisassembler -include $(LEVEL)/Makefile.common +# Hack: we need to include 'main' Mips target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. -CompileCommonOpts += -Wno-format +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp new file mode 100644 index 0000000..78dbc06 --- /dev/null +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -0,0 +1,552 @@ +//===- MipsDisassembler.cpp - Disassembler for Mips -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the Mips Disassembler. +// +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/MathExtras.h" + + +#include "MipsGenEDInfo.inc" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +/// MipsDisassembler - a disasembler class for Mips32. +class MipsDisassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + MipsDisassembler(const MCSubtargetInfo &STI, bool bigEndian) : + MCDisassembler(STI), isBigEndian(bigEndian) { + } + + ~MipsDisassembler() { + } + + /// getInstruction - See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + /// getEDInfo - See MCDisassembler. + const EDInstInfo *getEDInfo() const; + +private: + bool isBigEndian; +}; + + +/// Mips64Disassembler - a disasembler class for Mips64. +class Mips64Disassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + Mips64Disassembler(const MCSubtargetInfo &STI, bool bigEndian) : + MCDisassembler(STI), isBigEndian(bigEndian) { + } + + ~Mips64Disassembler() { + } + + /// getInstruction - See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + /// getEDInfo - See MCDisassembler. + const EDInstInfo *getEDInfo() const; + +private: + bool isBigEndian; +}; + +const EDInstInfo *MipsDisassembler::getEDInfo() const { + return instInfoMips; +} + +const EDInstInfo *Mips64Disassembler::getEDInfo() const { + return instInfoMips; +} + +// Decoder tables for Mips register +static const unsigned CPURegsTable[] = { + Mips::ZERO, Mips::AT, Mips::V0, Mips::V1, + Mips::A0, Mips::A1, Mips::A2, Mips::A3, + Mips::T0, Mips::T1, Mips::T2, Mips::T3, + Mips::T4, Mips::T5, Mips::T6, Mips::T7, + Mips::S0, Mips::S1, Mips::S2, Mips::S3, + Mips::S4, Mips::S5, Mips::S6, Mips::S7, + Mips::T8, Mips::T9, Mips::K0, Mips::K1, + Mips::GP, Mips::SP, Mips::FP, Mips::RA +}; + +static const unsigned FGR32RegsTable[] = { + Mips::F0, Mips::F1, Mips::F2, Mips::F3, + Mips::F4, Mips::F5, Mips::F6, Mips::F7, + Mips::F8, Mips::F9, Mips::F10, Mips::F11, + Mips::F12, Mips::F13, Mips::F14, Mips::F15, + Mips::F16, Mips::F17, Mips::F18, Mips::F18, + Mips::F20, Mips::F21, Mips::F22, Mips::F23, + Mips::F24, Mips::F25, Mips::F26, Mips::F27, + Mips::F28, Mips::F29, Mips::F30, Mips::F31 +}; + +static const unsigned CPU64RegsTable[] = { + Mips::ZERO_64, Mips::AT_64, Mips::V0_64, Mips::V1_64, + Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, + Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64, + Mips::T4_64, Mips::T5_64, Mips::T6_64, Mips::T7_64, + Mips::S0_64, Mips::S1_64, Mips::S2_64, Mips::S3_64, + Mips::S4_64, Mips::S5_64, Mips::S6_64, Mips::S7_64, + Mips::T8_64, Mips::T9_64, Mips::K0_64, Mips::K1_64, + Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64 +}; + +static const unsigned FGR64RegsTable[] = { + Mips::D0_64, Mips::D1_64, Mips::D2_64, Mips::D3_64, + Mips::D4_64, Mips::D5_64, Mips::D6_64, Mips::D7_64, + Mips::D8_64, Mips::D9_64, Mips::D10_64, Mips::D11_64, + Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, + Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64, + Mips::D20_64, Mips::D21_64, Mips::D22_64, Mips::D23_64, + Mips::D24_64, Mips::D25_64, Mips::D26_64, Mips::D27_64, + Mips::D28_64, Mips::D29_64, Mips::D30_64, Mips::D31_64 +}; + +static const unsigned AFGR64RegsTable[] = { + Mips::D0, Mips::D1, Mips::D2, Mips::D3, + Mips::D4, Mips::D5, Mips::D6, Mips::D7, + Mips::D8, Mips::D9, Mips::D10, Mips::D11, + Mips::D12, Mips::D13, Mips::D14, Mips::D15 +}; + +// Forward declare these because the autogenerated code will reference them. +// Definitions are further down. +static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeBranchTarget(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeBC1(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeJumpTarget(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeSimm16(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCondCode(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeInsSize(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeExtSize(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +namespace llvm { +extern Target TheMipselTarget, TheMipsTarget, TheMips64Target, + TheMips64elTarget; +} + +static MCDisassembler *createMipsDisassembler( + const Target &T, + const MCSubtargetInfo &STI) { + return new MipsDisassembler(STI,true); +} + +static MCDisassembler *createMipselDisassembler( + const Target &T, + const MCSubtargetInfo &STI) { + return new MipsDisassembler(STI,false); +} + +static MCDisassembler *createMips64Disassembler( + const Target &T, + const MCSubtargetInfo &STI) { + return new Mips64Disassembler(STI,true); +} + +static MCDisassembler *createMips64elDisassembler( + const Target &T, + const MCSubtargetInfo &STI) { + return new Mips64Disassembler(STI, false); +} + +extern "C" void LLVMInitializeMipsDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheMipsTarget, + createMipsDisassembler); + TargetRegistry::RegisterMCDisassembler(TheMipselTarget, + createMipselDisassembler); + TargetRegistry::RegisterMCDisassembler(TheMips64Target, + createMips64Disassembler); + TargetRegistry::RegisterMCDisassembler(TheMips64elTarget, + createMips64elDisassembler); +} + + +#include "MipsGenDisassemblerTables.inc" + + /// readInstruction - read four bytes from the MemoryObject + /// and return 32 bit word sorted according to the given endianess +static DecodeStatus readInstruction32(const MemoryObject ®ion, + uint64_t address, + uint64_t &size, + uint32_t &insn, + bool isBigEndian) { + uint8_t Bytes[4]; + + // We want to read exactly 4 Bytes of data. + if (region.readBytes(address, 4, (uint8_t*)Bytes, NULL) == -1) { + size = 0; + return MCDisassembler::Fail; + } + + if (isBigEndian) { + // Encoded as a big-endian 32-bit word in the stream. + insn = (Bytes[3] << 0) | + (Bytes[2] << 8) | + (Bytes[1] << 16) | + (Bytes[0] << 24); + } + else { + // Encoded as a small-endian 32-bit word in the stream. + insn = (Bytes[0] << 0) | + (Bytes[1] << 8) | + (Bytes[2] << 16) | + (Bytes[3] << 24); + } + + return MCDisassembler::Success; +} + +DecodeStatus +MipsDisassembler::getInstruction(MCInst &instr, + uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &vStream, + raw_ostream &cStream) const { + uint32_t Insn; + + DecodeStatus Result = readInstruction32(Region, Address, Size, + Insn, isBigEndian); + if (Result == MCDisassembler::Fail) + return MCDisassembler::Fail; + + // Calling the auto-generated decoder function. + Result = decodeMipsInstruction32(instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + + return MCDisassembler::Fail; +} + +DecodeStatus +Mips64Disassembler::getInstruction(MCInst &instr, + uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &vStream, + raw_ostream &cStream) const { + uint32_t Insn; + + DecodeStatus Result = readInstruction32(Region, Address, Size, + Insn, isBigEndian); + if (Result == MCDisassembler::Fail) + return MCDisassembler::Fail; + + // Calling the auto-generated decoder function. + Result = decodeMips64Instruction32(instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + // If we fail to decode in Mips64 decoder space we can try in Mips32 + Result = decodeMipsInstruction32(instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(CPU64RegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(FGR32RegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateReg(RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<16>(Insn & 0xffff); + int Reg = (int)fieldFromInstruction32(Insn, 16, 5); + int Base = (int)fieldFromInstruction32(Insn, 21, 5); + + if(Inst.getOpcode() == Mips::SC){ + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg])); + } + + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg])); + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base])); + Inst.addOperand(MCOperand::CreateImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<16>(Insn & 0xffff); + int Reg = (int)fieldFromInstruction32(Insn, 16, 5); + int Base = (int)fieldFromInstruction32(Insn, 21, 5); + + Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[Reg])); + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base])); + Inst.addOperand(MCOperand::CreateImm(Offset)); + + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + // Currently only hardware register 29 is supported. + if (RegNo != 29) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(Mips::HWR29)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCondCode(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int CondCode = Insn & 0xf; + Inst.addOperand(MCOperand::CreateImm(CondCode)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(AFGR64RegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + //Currently only hardware register 29 is supported + if (RegNo != 29) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(Mips::HWR29)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBranchTarget(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder) { + unsigned BranchOffset = Offset & 0xffff; + BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4; + Inst.addOperand(MCOperand::CreateImm(BranchOffset)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBC1(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned BranchOffset = Insn & 0xffff; + BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4; + Inst.addOperand(MCOperand::CreateImm(BranchOffset)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeJumpTarget(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + + unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2; + Inst.addOperand(MCOperand::CreateImm(JumpOffset)); + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeSimm16(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Insn))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeInsSize(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + // First we need to grab the pos(lsb) from MCInst. + int Pos = Inst.getOperand(2).getImm(); + int Size = (int) Insn - Pos + 1; + Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeExtSize(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Size = (int) Insn + 1; + Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size))); + return MCDisassembler::Success; +} diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 2917a89..6886b17 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -16,12 +16,12 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "MipsGenAsmWriter.inc" const char* Mips::MipsFCCToString(Mips::CondCode CC) { @@ -62,10 +62,6 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) { llvm_unreachable("Impossible condition code!"); } -StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << '$' << StringRef(getRegisterName(RegNo)).lower(); } diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index acd761d..76b839b 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -77,15 +77,14 @@ class TargetMachine; class MipsInstPrinter : public MCInstPrinter { public: - MipsInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) : - MCInstPrinter(MAI, MRI) {} + MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); - static const char *getInstructionName(unsigned Opcode); static const char *getRegisterName(unsigned RegNo); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt index abbed8c..a95d6bc 100644 --- a/lib/Target/Mips/LLVMBuild.txt +++ b/lib/Target/Mips/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -24,6 +24,7 @@ name = Mips parent = Target has_asmparser = 1 has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 9d5a2f1..9b4caf6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -14,19 +14,13 @@ #include "MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Object/MachOFormat.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -61,7 +55,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Mips::fixup_Mips_HI16: case Mips::fixup_Mips_GOT_Local: // Get the higher 16-bits. Also add 1 if bit 15 is 1. - Value = (Value >> 16) + ((Value & 0x8000) != 0); + Value = ((Value + 0x8000) >> 16) & 0xffff; break; } @@ -72,13 +66,15 @@ namespace { class MipsAsmBackend : public MCAsmBackend { Triple::OSType OSType; bool IsLittle; // Big or little endian + bool Is64Bit; // 32 or 64 bit words public: - MipsAsmBackend(const Target &T, Triple::OSType _OSType, bool _isLittle) : - MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle) {} + MipsAsmBackend(const Target &T, Triple::OSType _OSType, + bool _isLittle, bool _is64Bit) + :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createMipsELFObjectWriter(OS, OSType, IsLittle); + return createMipsELFObjectWriter(OS, OSType, IsLittle, Is64Bit); } /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided @@ -120,7 +116,7 @@ public: } uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); - CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask); + CurVal |= Value & Mask; // Write out the fixed up bytes back to the code/data bits. for (unsigned i = 0; i != NumBytes; ++i) { @@ -212,17 +208,28 @@ public: bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -}; +}; // class MipsAsmBackend } // namespace // MCAsmBackend -MCAsmBackend *llvm::createMipsAsmBackendEL(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT) { return new MipsAsmBackend(T, Triple(TT).getOS(), - /*IsLittle*/true); + /*IsLittle*/true, /*Is64Bit*/false); } -MCAsmBackend *llvm::createMipsAsmBackendEB(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT) { return new MipsAsmBackend(T, Triple(TT).getOS(), - /*IsLittle*/false); + /*IsLittle*/false, /*Is64Bit*/false); } + +MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT) { + return new MipsAsmBackend(T, Triple(TT).getOS(), + /*IsLittle*/true, /*Is64Bit*/true); +} + +MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT) { + return new MipsAsmBackend(T, Triple(TT).getOS(), + /*IsLittle*/false, /*Is64Bit*/true); +} + diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 34e3a6e..fb1c5ce 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -14,7 +14,9 @@ #ifndef MIPSBASEINFO_H #define MIPSBASEINFO_H +#include "MipsFixupKinds.h" #include "MipsMCTargetDesc.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -198,6 +200,34 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) default: llvm_unreachable("Unknown register number!"); } } + +inline static std::pair<const MCSymbolRefExpr*, int64_t> +MipsGetSymAndOffset(const MCFixup &Fixup) { + MCFixupKind FixupKind = Fixup.getKind(); + + if ((FixupKind < FirstTargetFixupKind) || + (FixupKind >= MCFixupKind(Mips::LastTargetFixupKind))) + return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0); + + const MCExpr *Expr = Fixup.getValue(); + MCExpr::ExprKind Kind = Expr->getKind(); + + if (Kind == MCExpr::Binary) { + const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr); + const MCExpr *LHS = BE->getLHS(); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS()); + + if ((LHS->getKind() != MCExpr::SymbolRef) || !CE) + return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0); + + return std::make_pair(cast<MCSymbolRefExpr>(LHS), CE->getValue()); + } + + if (Kind != MCExpr::SymbolRef) + return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0); + + return std::make_pair(cast<MCSymbolRefExpr>(Expr), 0); +} } #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 076a6a8..2091bec 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -7,20 +7,34 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" +#include <list> using namespace llvm; namespace { + struct RelEntry { + RelEntry(const ELFRelocationEntry &R, const MCSymbol *S, int64_t O) : + Reloc(R), Sym(S), Offset(O) {} + ELFRelocationEntry Reloc; + const MCSymbol *Sym; + int64_t Offset; + }; + + typedef std::list<RelEntry> RelLs; + typedef RelLs::iterator RelLsIter; + class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(uint8_t OSABI); + MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI); virtual ~MipsELFObjectWriter(); @@ -33,18 +47,28 @@ namespace { const MCFragment &F, const MCFixup &Fixup, bool IsPCRel) const; + virtual void sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs); }; } -MipsELFObjectWriter::MipsELFObjectWriter(uint8_t OSABI) - : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_MIPS, +MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI) + : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, /*HasRelocationAddend*/ false) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} -// FIXME: get the real EABI Version from the Triple. +// FIXME: get the real EABI Version from the Subtarget class. unsigned MipsELFObjectWriter::getEFlags() const { - return ELF::EF_MIPS_NOREORDER | ELF::EF_MIPS_ARCH_32R2; + + // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static) + unsigned Flag = ELF::EF_MIPS_NOREORDER; + + if (is64Bit()) + Flag |= ELF::EF_MIPS_ARCH_64R2; + else + Flag |= ELF::EF_MIPS_ARCH_32R2; + return Flag; } const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, @@ -129,8 +153,97 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } -MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, - bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(OSABI); +// Return true if R is either a GOT16 against a local symbol or HI16. +static bool NeedsMatchingLo(const MCAssembler &Asm, const RelEntry &R) { + if (!R.Sym) + return false; + + MCSymbolData &SD = Asm.getSymbolData(R.Sym->AliasedSymbol()); + + return ((R.Reloc.Type == ELF::R_MIPS_GOT16) && !SD.isExternal()) || + (R.Reloc.Type == ELF::R_MIPS_HI16); +} + +static bool HasMatchingLo(const MCAssembler &Asm, RelLsIter I, RelLsIter Last) { + if (I == Last) + return false; + + RelLsIter Hi = I++; + + return (I->Reloc.Type == ELF::R_MIPS_LO16) && (Hi->Sym == I->Sym) && + (Hi->Offset == I->Offset); +} + +static bool HasSameSymbol(const RelEntry &R0, const RelEntry &R1) { + return R0.Sym == R1.Sym; +} + +static int CompareOffset(const RelEntry &R0, const RelEntry &R1) { + return (R0.Offset > R1.Offset) ? 1 : ((R0.Offset == R1.Offset) ? 0 : -1); +} + +void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + // Call the defualt function first. Relocations are sorted in descending + // order of r_offset. + MCELFObjectTargetWriter::sortRelocs(Asm, Relocs); + + RelLs RelocLs; + std::vector<RelLsIter> Unmatched; + + // Fill RelocLs. Traverse Relocs backwards so that relocations in RelocLs + // are in ascending order of r_offset. + for (std::vector<ELFRelocationEntry>::reverse_iterator R = Relocs.rbegin(); + R != Relocs.rend(); ++R) { + std::pair<const MCSymbolRefExpr*, int64_t> P = + MipsGetSymAndOffset(*R->Fixup); + RelocLs.push_back(RelEntry(*R, P.first ? &P.first->getSymbol() : 0, + P.second)); + } + + // Get list of unmatched HI16 and GOT16. + for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) + if (NeedsMatchingLo(Asm, *R) && !HasMatchingLo(Asm, R, --RelocLs.end())) + Unmatched.push_back(R); + + // Insert unmatched HI16 and GOT16 immediately before their matching LO16. + for (std::vector<RelLsIter>::iterator U = Unmatched.begin(); + U != Unmatched.end(); ++U) { + RelLsIter LoPos = RelocLs.end(), HiPos = *U; + bool MatchedLo = false; + + for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) { + if ((R->Reloc.Type == ELF::R_MIPS_LO16) && HasSameSymbol(*HiPos, *R) && + (CompareOffset(*R, *HiPos) >= 0) && + ((LoPos == RelocLs.end()) || ((CompareOffset(*R, *LoPos) < 0)) || + (!MatchedLo && !CompareOffset(*R, *LoPos)))) + LoPos = R; + + MatchedLo = NeedsMatchingLo(Asm, *R) && + HasMatchingLo(Asm, R, --RelocLs.end()); + } + + // If a matching LoPos was found, move HiPos and insert it before LoPos. + // Make the offsets of HiPos and LoPos match. + if (LoPos != RelocLs.end()) { + HiPos->Offset = LoPos->Offset; + RelocLs.insert(LoPos, *HiPos); + RelocLs.erase(HiPos); + } + } + + // Put the sorted list back in reverse order. + assert(Relocs.size() == RelocLs.size()); + unsigned I = RelocLs.size(); + + for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) + Relocs[--I] = R->Reloc; +} + +MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, + uint8_t OSABI, + bool IsLittleEndian, + bool Is64Bit) { + MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h index ef4c6e2..e1d8789 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h @@ -14,10 +14,10 @@ #ifndef MIPSTARGETASMINFO_H #define MIPSTARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { + class StringRef; class Target; class MipsMCAsmInfo : public MCAsmInfo { diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 9ebb6d2..27954b1 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -179,73 +179,71 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } else if (MO.isFPImm()) { return static_cast<unsigned>(APFloat(MO.getFPImm()) .bitcastToAPInt().getHiBits(32).getLimitedValue()); - } else if (MO.isExpr()) { - const MCExpr *Expr = MO.getExpr(); - MCExpr::ExprKind Kind = Expr->getKind(); - unsigned Ret = 0; - - if (Kind == MCExpr::Binary) { - const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr); - Expr = BE->getLHS(); - Kind = Expr->getKind(); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS()); - assert((Kind == MCExpr::SymbolRef) && CE && - "Binary expression must be sym+const."); - Ret = CE->getValue(); - } + } + + // MO must be an Expr. + assert(MO.isExpr()); + + const MCExpr *Expr = MO.getExpr(); + MCExpr::ExprKind Kind = Expr->getKind(); - if (Kind == MCExpr::SymbolRef) { - Mips::Fixups FixupKind; - - switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { - case MCSymbolRefExpr::VK_Mips_GPREL: - FixupKind = Mips::fixup_Mips_GPREL16; - break; - case MCSymbolRefExpr::VK_Mips_GOT_CALL: - FixupKind = Mips::fixup_Mips_CALL16; - break; - case MCSymbolRefExpr::VK_Mips_GOT16: - FixupKind = Mips::fixup_Mips_GOT_Global; - break; - case MCSymbolRefExpr::VK_Mips_GOT: - FixupKind = Mips::fixup_Mips_GOT_Local; - break; - case MCSymbolRefExpr::VK_Mips_ABS_HI: - FixupKind = Mips::fixup_Mips_HI16; - break; - case MCSymbolRefExpr::VK_Mips_ABS_LO: - FixupKind = Mips::fixup_Mips_LO16; - break; - case MCSymbolRefExpr::VK_Mips_TLSGD: - FixupKind = Mips::fixup_Mips_TLSGD; - break; - case MCSymbolRefExpr::VK_Mips_TLSLDM: - FixupKind = Mips::fixup_Mips_TLSLDM; - break; - case MCSymbolRefExpr::VK_Mips_DTPREL_HI: - FixupKind = Mips::fixup_Mips_DTPREL_HI; - break; - case MCSymbolRefExpr::VK_Mips_DTPREL_LO: - FixupKind = Mips::fixup_Mips_DTPREL_LO; - break; - case MCSymbolRefExpr::VK_Mips_GOTTPREL: - FixupKind = Mips::fixup_Mips_GOTTPREL; - break; - case MCSymbolRefExpr::VK_Mips_TPREL_HI: - FixupKind = Mips::fixup_Mips_TPREL_HI; - break; - case MCSymbolRefExpr::VK_Mips_TPREL_LO: - FixupKind = Mips::fixup_Mips_TPREL_LO; - break; - default: - return Ret; - } // switch - Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); - } // if SymbolRef - // All of the information is in the fixup. - return Ret; + if (Kind == MCExpr::Binary) { + Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS(); + Kind = Expr->getKind(); } - llvm_unreachable("Unable to encode MCOperand!"); + + assert (Kind == MCExpr::SymbolRef); + + Mips::Fixups FixupKind; + + switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { + case MCSymbolRefExpr::VK_Mips_GPREL: + FixupKind = Mips::fixup_Mips_GPREL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT_CALL: + FixupKind = Mips::fixup_Mips_CALL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT16: + FixupKind = Mips::fixup_Mips_GOT_Global; + break; + case MCSymbolRefExpr::VK_Mips_GOT: + FixupKind = Mips::fixup_Mips_GOT_Local; + break; + case MCSymbolRefExpr::VK_Mips_ABS_HI: + FixupKind = Mips::fixup_Mips_HI16; + break; + case MCSymbolRefExpr::VK_Mips_ABS_LO: + FixupKind = Mips::fixup_Mips_LO16; + break; + case MCSymbolRefExpr::VK_Mips_TLSGD: + FixupKind = Mips::fixup_Mips_TLSGD; + break; + case MCSymbolRefExpr::VK_Mips_TLSLDM: + FixupKind = Mips::fixup_Mips_TLSLDM; + break; + case MCSymbolRefExpr::VK_Mips_DTPREL_HI: + FixupKind = Mips::fixup_Mips_DTPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_DTPREL_LO: + FixupKind = Mips::fixup_Mips_DTPREL_LO; + break; + case MCSymbolRefExpr::VK_Mips_GOTTPREL: + FixupKind = Mips::fixup_Mips_GOTTPREL; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_HI: + FixupKind = Mips::fixup_Mips_TPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_LO: + FixupKind = Mips::fixup_Mips_TPREL_LO; + break; + default: + break; + } // switch + + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind))); + + // All of the information is in the fixup. + return 0; } /// getMemEncoding - Return binary encoding of memory related operand. diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 7652675..f634f08 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -34,6 +34,38 @@ using namespace llvm; +static std::string ParseMipsTriple(StringRef TT, StringRef CPU) { + std::string MipsArchFeature; + size_t DashPosition = 0; + StringRef TheTriple; + + // Let's see if there is a dash, like mips-unknown-linux. + DashPosition = TT.find('-'); + + if (DashPosition == StringRef::npos) { + // No dash, we check the string size. + TheTriple = TT.substr(0); + } else { + // We are only interested in substring before dash. + TheTriple = TT.substr(0,DashPosition); + } + + if (TheTriple == "mips" || TheTriple == "mipsel") { + if (CPU.empty() || CPU == "mips32") { + MipsArchFeature = "+mips32"; + } else if (CPU == "mips32r2") { + MipsArchFeature = "+mips32r2"; + } + } else { + if (CPU.empty() || CPU == "mips64") { + MipsArchFeature = "+mips64"; + } else if (CPU == "mips64r2") { + MipsArchFeature = "+mips64r2"; + } + } + return MipsArchFeature; +} + static MCInstrInfo *createMipsMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitMipsMCInstrInfo(X); @@ -48,8 +80,15 @@ static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) { static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { + std::string ArchFS = ParseMipsTriple(TT,CPU); + if (!FS.empty()) { + if (!ArchFS.empty()) + ArchFS = ArchFS + "," + FS.str(); + else + ArchFS = FS; + } MCSubtargetInfo *X = new MCSubtargetInfo(); - InitMipsMCSubtargetInfo(X, TT, CPU, FS); + InitMipsMCSubtargetInfo(X, TT, CPU, ArchFS); return X; } @@ -67,7 +106,9 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - if (RM == Reloc::Default) + if (CM == CodeModel::JITDefault) + RM = Reloc::Static; + else if (RM == Reloc::Default) RM = Reloc::PIC_; X->InitMCCodeGenInfo(RM, CM, OL); return X; @@ -76,9 +117,10 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createMipsMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new MipsInstPrinter(MAI, MRI); + return new MipsInstPrinter(MAI, MII, MRI); } static MCStreamer *createMCStreamer(const Target &T, StringRef TT, @@ -142,13 +184,13 @@ extern "C" void LLVMInitializeMipsTargetMC() { // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, - createMipsAsmBackendEB); + createMipsAsmBackendEB32); TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, - createMipsAsmBackendEL); + createMipsAsmBackendEL32); TargetRegistry::RegisterMCAsmBackend(TheMips64Target, - createMipsAsmBackendEB); + createMipsAsmBackendEB64); TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, - createMipsAsmBackendEL); + createMipsAsmBackendEL64); // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 2e58f9d..547ccdd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -39,12 +39,15 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createMipsAsmBackendEB(const Target &T, StringRef TT); -MCAsmBackend *createMipsAsmBackendEL(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEB32(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEL32(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEB64(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEL64(const Target &T, StringRef TT); MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, - bool IsLittleEndian); + bool IsLittleEndian, + bool Is64Bit); } // End llvm namespace // Defines symbolic names for Mips registers. This defines a mapping from diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 168635c..596f071 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -15,9 +15,9 @@ TARGET = Mips BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ - MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc - -DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc + MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ + MipsGenEDInfo.inc MipsGenDisassemblerTables.inc +DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 427e8d9..0382869 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -36,6 +36,7 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>; //===----------------------------------------------------------------------===// // Shifts // 64-bit shift instructions. +let DecoderNamespace = "Mips64" in { class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm, SDNode OpNode>: shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt, @@ -49,16 +50,21 @@ class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: multiclass Atomic2Ops64<PatFrag Op, string Opstr> { def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>, Requires<[NotN64]>; - def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, Requires<[IsN64]>; + def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, Requires<[IsN64]> { + let isCodeGenOnly = 1; + } } multiclass AtomicCmpSwap64<PatFrag Op, string Width> { def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>, Requires<[NotN64]>; def _P8 : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let isCodeGenOnly = 1; + } } - -let usesCustomInserter = 1, Predicates = [HasMips64] in { +} +let usesCustomInserter = 1, Predicates = [HasMips64], + DecoderNamespace = "Mips64" in { defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64, "load_add_64">; defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">; defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64, "load_and_64">; @@ -72,7 +78,7 @@ let usesCustomInserter = 1, Predicates = [HasMips64] in { //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// - +let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16, CPU64Regs>; @@ -97,16 +103,17 @@ def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>; def DSLL : shift_rotate_imm64<0x38, 0x00, "dsll", shl>; def DSRL : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>; def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>; -def DSLLV : shift_rotate_reg<0x24, 0x00, "dsllv", shl, CPU64Regs>; -def DSRLV : shift_rotate_reg<0x26, 0x00, "dsrlv", srl, CPU64Regs>; -def DSRAV : shift_rotate_reg<0x27, 0x00, "dsrav", sra, CPU64Regs>; - +def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>; +def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>; +def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>; +} // Rotate Instructions -let Predicates = [HasMips64r2] in { +let Predicates = [HasMips64r2], DecoderNamespace = "Mips64" in { def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>; def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>; } +let DecoderNamespace = "Mips64" in { /// Load and Store Instructions /// aligned defm LB64 : LoadM64<0x20, "lb", sextloadi8>; @@ -132,9 +139,13 @@ defm USD : StoreM64<0x3f, "usd", store_u, 1>; /// Load-linked, Store-conditional def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, Requires<[NotN64]>; -def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]>; +def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]> { + let isCodeGenOnly = 1; +} def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>; -def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>; +def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]> { + let isCodeGenOnly = 1; +} /// Jump and Branch Instructions def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>; @@ -142,11 +153,13 @@ def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>; -def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>; +def BLEZ64 : CBranchZero<0x06, 0, "blez", setle, CPU64Regs>; def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>; - +} +let DecoderNamespace = "Mips64" in def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>; +let DecoderNamespace = "Mips64" in { /// Multiply and Divide Instructions. def DMULT : Mult64<0x1c, "dmult", IIImul>; def DMULTu : Mult64<0x1d, "dmultu", IIImul>; @@ -171,11 +184,13 @@ def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>; def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>; def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; - -let Uses = [SP_64] in +} +let Uses = [SP_64], DecoderNamespace = "Mips64" in def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, - Requires<[IsN64]>; - + Requires<[IsN64]> { + let isCodeGenOnly = 1; +} +let DecoderNamespace = "Mips64" in { def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; def DEXT : ExtBase<3, "dext", CPU64Regs>; @@ -183,12 +198,12 @@ def DINS : InsBase<7, "dins", CPU64Regs>; def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), "dsll\t$rd, $rt, 32", [], IIAlu>; - def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), "sll\t$rd, $rt, 0", [], IIAlu>; +let isCodeGenOnly = 1 in def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), "sll\t$rd, $rt, 0", [], IIAlu>; - +} //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index f2b842a..8206cfc 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -16,8 +16,6 @@ #include "MipsAsmPrinter.h" #include "Mips.h" #include "MipsInstrInfo.h" -#include "MipsMachineFunction.h" -#include "MipsMCInstLower.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -45,24 +43,23 @@ using namespace llvm; -static bool isUnalignedLoadStore(unsigned Opc) { - return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu || - Opc == Mips::USW || Opc == Mips::USH || - Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 || - Opc == Mips::USW_P8 || Opc == Mips::USH_P8 || - Opc == Mips::ULD || Opc == Mips::ULW64 || Opc == Mips::ULH64 || - Opc == Mips::ULHu64 || Opc == Mips::USD || Opc == Mips::USW64 || - Opc == Mips::USH64 || - Opc == Mips::ULD_P8 || Opc == Mips::ULW64_P8 || - Opc == Mips::ULH64_P8 || Opc == Mips::ULHu64_P8 || - Opc == Mips::USD_P8 || Opc == Mips::USW64_P8 || - Opc == Mips::USH64_P8; +void MipsAsmPrinter::EmitInstrWithMacroNoAT(const MachineInstr *MI) { + MCInst TmpInst; + + MCInstLowering.Lower(MI, TmpInst); + OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); + if (MipsFI->getEmitNOAT()) + OutStreamer.EmitRawText(StringRef("\t.set\tat")); + OutStreamer.EmitInstruction(TmpInst); + if (MipsFI->getEmitNOAT()) + OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); } -static bool isDirective(unsigned Opc) { - return Opc == Mips::MACRO || Opc == Mips::NOMACRO || - Opc == Mips::REORDER || Opc == Mips::NOREORDER || - Opc == Mips::ATMACRO || Opc == Mips::NOAT; +bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + MipsFI = MF.getInfo<MipsFunctionInfo>(); + AsmPrinter::runOnMachineFunction(MF); + return true; } void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -74,49 +71,70 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - MipsMCInstLower MCInstLowering(Mang, *MF, *this); unsigned Opc = MI->getOpcode(); MCInst TmpInst0; SmallVector<MCInst, 4> MCInsts; - MCInstLowering.Lower(MI, TmpInst0); - - if (!OutStreamer.hasRawTextSupport() && isDirective(Opc)) - return; - // Enclose unaligned load or store with .macro & .nomacro directives. - if (isUnalignedLoadStore(Opc)) { + switch (Opc) { + case Mips::ULW: + case Mips::ULH: + case Mips::ULHu: + case Mips::USW: + case Mips::USH: + case Mips::ULW_P8: + case Mips::ULH_P8: + case Mips::ULHu_P8: + case Mips::USW_P8: + case Mips::USH_P8: + case Mips::ULD: + case Mips::ULW64: + case Mips::ULH64: + case Mips::ULHu64: + case Mips::USD: + case Mips::USW64: + case Mips::USH64: + case Mips::ULD_P8: + case Mips::ULW64_P8: + case Mips::ULH64_P8: + case Mips::ULHu64_P8: + case Mips::USD_P8: + case Mips::USW64_P8: + case Mips::USH64_P8: { if (OutStreamer.hasRawTextSupport()) { - MCInst Directive; - Directive.setOpcode(Mips::MACRO); - OutStreamer.EmitInstruction(Directive); - OutStreamer.EmitInstruction(TmpInst0); - Directive.setOpcode(Mips::NOMACRO); - OutStreamer.EmitInstruction(Directive); - } else { - MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts); - for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I - != MCInsts.end(); ++I) - OutStreamer.EmitInstruction(*I); + EmitInstrWithMacroNoAT(MI); + return; } + + MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts); + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I + != MCInsts.end(); ++I) + OutStreamer.EmitInstruction(*I); + return; } + case Mips::CPRESTORE: { + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isImm() && "CPRESTORE's operand must be an immediate."); + int64_t Offset = MO.getImm(); - if (!OutStreamer.hasRawTextSupport()) { - // Lower CPLOAD and CPRESTORE - if (Opc == Mips::CPLOAD) - MCInstLowering.LowerCPLOAD(MI, MCInsts); - else if (Opc == Mips::CPRESTORE) - MCInstLowering.LowerCPRESTORE(MI, MCInsts); + if (OutStreamer.hasRawTextSupport()) { + if (!isInt<16>(Offset)) { + EmitInstrWithMacroNoAT(MI); + return; + } + } else { + MCInstLowering.LowerCPRESTORE(Offset, MCInsts); - if (!MCInsts.empty()) { for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I != MCInsts.end(); ++I) OutStreamer.EmitInstruction(*I); + return; } - } - if (Opc == Mips::SETGP01) { + break; + } + case Mips::SETGP01: { MCInstLowering.LowerSETGP01(MI, MCInsts); for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); @@ -125,7 +143,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + default: + break; + } + MCInstLowering.Lower(MI, TmpInst0); OutStreamer.EmitInstruction(TmpInst0); } @@ -269,13 +291,35 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { /// EmitFunctionBodyStart - Targets can override this to emit stuff before /// the first basic block in the function. void MipsAsmPrinter::EmitFunctionBodyStart() { + MCInstLowering.Initialize(Mang, &MF->getContext()); + emitFrameDirective(); + bool EmitCPLoad = (MF->getTarget().getRelocationModel() == Reloc::PIC_) && + Subtarget->isABI_O32() && MipsFI->globalBaseRegSet() && + MipsFI->globalBaseRegFixed(); + if (OutStreamer.hasRawTextSupport()) { SmallString<128> Str; raw_svector_ostream OS(Str); printSavedRegsBitmask(OS); OutStreamer.EmitRawText(OS.str()); + + OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); + + // Emit .cpload directive if needed. + if (EmitCPLoad) + OutStreamer.EmitRawText(StringRef("\t.cpload\t$25")); + + OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); + if (MipsFI->getEmitNOAT()) + OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + } else if (EmitCPLoad) { + SmallVector<MCInst, 4> MCInsts; + MCInstLowering.LowerCPLOAD(MCInsts); + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); + I != MCInsts.end(); ++I) + OutStreamer.EmitInstruction(*I); } } @@ -286,6 +330,9 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { // always be at the function end, and we can't emit and // break with BB logic. if (OutStreamer.hasRawTextSupport()) { + if (MipsFI->getEmitNOAT()) + OutStreamer.EmitRawText(StringRef("\t.set\tat")); + OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); OutStreamer.EmitRawText(StringRef("\t.set\treorder")); OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 473da7e..562bf9c 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -14,6 +14,8 @@ #ifndef MIPSASMPRINTER_H #define MIPSASMPRINTER_H +#include "MipsMachineFunction.h" +#include "MipsMCInstLower.h" #include "MipsSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/Compiler.h" @@ -28,12 +30,16 @@ class raw_ostream; class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { + void EmitInstrWithMacroNoAT(const MachineInstr *MI); + public: const MipsSubtarget *Subtarget; + const MipsFunctionInfo *MipsFI; + MipsMCInstLower MCInstLowering; explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { + : AsmPrinter(TM, Streamer), MCInstLowering(*this) { Subtarget = &TM.getSubtarget<MipsSubtarget>(); } @@ -41,6 +47,8 @@ public: return "Mips Assembly Printer"; } + virtual bool runOnMachineFunction(MachineFunction &MF); + void EmitInstruction(const MachineInstr *MI); void printSavedRegsBitmask(raw_ostream &O); void printHex32(unsigned int Value, raw_ostream &O); diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 075a3e8..da33680 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -95,45 +95,65 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst, // Instantiation of instructions. def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">; -let Predicates = [HasMips64] in { +let Predicates = [HasMips64],DecoderNamespace = "Mips64" in { def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">; - def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz">; - def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz">; + def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> { + let isCodeGenOnly = 1; + } + def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz"> { + let isCodeGenOnly = 1; + } } def MOVN_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">; -let Predicates = [HasMips64] in { +let Predicates = [HasMips64],DecoderNamespace = "Mips64" in { def MOVN_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">; - def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn">; - def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn">; + def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn"> { + let isCodeGenOnly = 1; + } + def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn"> { + let isCodeGenOnly = 1; + } } def MOVZ_I_S : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">; def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">, - Requires<[HasMips64]>; + Requires<[HasMips64]> { + let DecoderNamespace = "Mips64"; +} def MOVN_I_S : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">; def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">, - Requires<[HasMips64]>; + Requires<[HasMips64]> { + let DecoderNamespace = "Mips64"; +} let Predicates = [NotFP64bit] in { def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">; def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">; } -let Predicates = [IsFP64bit] in { +let Predicates = [IsFP64bit],DecoderNamespace = "Mips64" in { def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">; - def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d">; + def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> { + let isCodeGenOnly = 1; + } def MOVN_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 19, "movn.d">; - def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d">; + def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d"> { + let isCodeGenOnly = 1; + } } def MOVT_I : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">; def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">, - Requires<[HasMips64]>; + Requires<[HasMips64]> { + let DecoderNamespace = "Mips64"; +} def MOVF_I : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">; def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">, - Requires<[HasMips64]>; + Requires<[HasMips64]> { + let DecoderNamespace = "Mips64"; +} def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">; def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">; @@ -142,7 +162,7 @@ let Predicates = [NotFP64bit] in { def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">; def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">; } -let Predicates = [IsFP64bit] in { +let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in { def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">; def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">; } diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index ebfbb4a..f8ea3d0 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -108,9 +108,6 @@ static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - // FIXME: change this when mips goes MC". - BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); - // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. @@ -127,7 +124,6 @@ static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, .addImm(SignExtend64<16>(Inst->ImmOpnd)); BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg); - BuildMI(MBB, II, DL, TII.get(Mips::ATMACRO)); } void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -159,18 +155,22 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // Update stack size MFI->setStackSize(StackSize); - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); - // Emit instructions that set the global base register if the target ABI is // O32. - if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32()) { - if (MipsFI->globalBaseRegFixed()) - BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::CPLOAD)) - .addReg(RegInfo->getPICCallReg()); - else + if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32() && + !MipsFI->globalBaseRegFixed()) { // See MipsInstrInfo.td for explanation. - BuildMI(MBB, MBBI, dl, TII.get(Mips:: SETGP01), Mips::V0); + MachineBasicBlock *NewEntry = MF.CreateMachineBasicBlock(); + MF.insert(&MBB, NewEntry); + NewEntry->addSuccessor(&MBB); + + // Copy live in registers. + for (MachineBasicBlock::livein_iterator R = MBB.livein_begin(); + R != MBB.livein_end(); ++R) + NewEntry->addLiveIn(*R); + + BuildMI(*NewEntry, NewEntry->begin(), dl, TII.get(Mips:: SETGP01), + Mips::V0); } // No need to allocate space on the stack. @@ -183,8 +183,10 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust stack. if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize) BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); - else // Expand immediate that doesn't fit in 16-bit. + else { // Expand immediate that doesn't fit in 16-bit. + MipsFI->setEmitNOAT(); expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); + } // emit ".cfi_def_cfa_offset StackSize" MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); @@ -254,12 +256,8 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // Restore GP from the saved stack location if (MipsFI->needGPSaveRestore()) { unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI()); - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset); - - if (Offset >= 0x8000) { - BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO)); - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); - } + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset) + .addReg(Mips::GP); } } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 536879e..f0651c6 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -142,6 +142,7 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { if (Subtarget.isABI_N64()) { MF.getRegInfo().addLiveIn(Mips::T9_64); + MBB.addLiveIn(Mips::T9_64); // lui $v0, %hi(%neg(%gp_rel(fname))) // daddu $v1, $v0, $t9 @@ -163,6 +164,7 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); } else { MF.getRegInfo().addLiveIn(Mips::T9); + MBB.addLiveIn(Mips::T9); if (Subtarget.isABI_N32()) { // lui $v0, %hi(%neg(%gp_rel(fname))) diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ecde5b6..6a23bc3 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -147,6 +147,11 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + if (!TM.Options.NoNaNsFPMath) { + setOperationAction(ISD::FABS, MVT::f32, Custom); + setOperationAction(ISD::FABS, MVT::f64, Custom); + } + if (HasMips64) { setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); @@ -208,6 +213,13 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FEXP, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + if (!TM.Options.NoNaNsFPMath) { + setOperationAction(ISD::FNEG, MVT::f32, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + } setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); @@ -732,6 +744,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); + case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); @@ -1541,7 +1554,7 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, EVT ValTy = Op.getValueType(); bool HasGotOfst = (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV))); - unsigned GotFlag = IsN64 ? + unsigned GotFlag = HasMips64 ? (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) : (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16); SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag); @@ -1553,8 +1566,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, if (!HasGotOfst) return ResNode; SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, - IsN64 ? MipsII::MO_GOT_OFST : - MipsII::MO_ABS_LO); + HasMips64 ? MipsII::MO_GOT_OFST : + MipsII::MO_ABS_LO); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo); return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo); } @@ -1575,8 +1588,8 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, } EVT ValTy = Op.getValueType(); - unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; - unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag); BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), BAGOTOffset); @@ -1679,8 +1692,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI); JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO); } else {// Emit Load from Global Pointer - unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; - unsigned OfstFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OfstFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag); JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT), JTI); @@ -1712,7 +1725,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), N->getOffset(), MipsII::MO_ABS_HI); SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), @@ -1722,8 +1735,8 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { EVT ValTy = Op.getValueType(); - unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; - unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), N->getOffset(), GOTFlag); CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), CP); @@ -1754,66 +1767,162 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV), false, false, 0); } -// Called if the size of integer registers is large enough to hold the whole -// floating point number. -static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) { - // FIXME: Use ext/ins instructions if target architecture is Mips32r2. - EVT ValTy = Op.getValueType(); - EVT IntValTy = MVT::getIntegerVT(ValTy.getSizeInBits()); - uint64_t Mask = (uint64_t)1 << (ValTy.getSizeInBits() - 1); - DebugLoc dl = Op.getDebugLoc(); - SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(1)); - SDValue And0 = DAG.getNode(ISD::AND, dl, IntValTy, Op0, - DAG.getConstant(Mask - 1, IntValTy)); - SDValue And1 = DAG.getNode(ISD::AND, dl, IntValTy, Op1, - DAG.getConstant(Mask, IntValTy)); - SDValue Result = DAG.getNode(ISD::OR, dl, IntValTy, And0, And1); - return DAG.getNode(ISD::BITCAST, dl, ValTy, Result); -} - -// Called if the size of integer registers is not large enough to hold the whole -// floating point number (e.g. f64 & 32-bit integer register). -static SDValue -LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) { - // FIXME: - // Use ext/ins instructions if target architecture is Mips32r2. - // Eliminate redundant mfc1 and mtc1 instructions. - unsigned LoIdx = 0, HiIdx = 1; - - if (!isLittle) - std::swap(LoIdx, HiIdx); +static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) { + EVT TyX = Op.getOperand(0).getValueType(); + EVT TyY = Op.getOperand(1).getValueType(); + SDValue Const1 = DAG.getConstant(1, MVT::i32); + SDValue Const31 = DAG.getConstant(31, MVT::i32); + DebugLoc DL = Op.getDebugLoc(); + SDValue Res; + + // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it + // to i32. + SDValue X = (TyX == MVT::f32) ? + DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) : + DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), + Const1); + SDValue Y = (TyY == MVT::f32) ? + DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(1)) : + DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1), + Const1); + + if (HasR2) { + // ext E, Y, 31, 1 ; extract bit31 of Y + // ins X, E, 31, 1 ; insert extracted bit at bit31 of X + SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1); + Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, E, Const31, Const1, X); + } else { + // sll SllX, X, 1 + // srl SrlX, SllX, 1 + // srl SrlY, Y, 31 + // sll SllY, SrlX, 31 + // or Or, SrlX, SllY + SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1); + SDValue SrlX = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1); + SDValue SrlY = DAG.getNode(ISD::SRL, DL, MVT::i32, Y, Const31); + SDValue SllY = DAG.getNode(ISD::SHL, DL, MVT::i32, SrlY, Const31); + Res = DAG.getNode(ISD::OR, DL, MVT::i32, SrlX, SllY); + } - DebugLoc dl = Op.getDebugLoc(); - SDValue Word0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, - Op.getOperand(0), - DAG.getConstant(LoIdx, MVT::i32)); - SDValue Hi0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, - Op.getOperand(0), DAG.getConstant(HiIdx, MVT::i32)); - SDValue Hi1 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, - Op.getOperand(1), DAG.getConstant(HiIdx, MVT::i32)); - SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi0, - DAG.getConstant(0x7fffffff, MVT::i32)); - SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi1, - DAG.getConstant(0x80000000, MVT::i32)); - SDValue Word1 = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1); + if (TyX == MVT::f32) + return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res); - if (!isLittle) - std::swap(Word0, Word1); + SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Op.getOperand(0), DAG.getConstant(0, MVT::i32)); + return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); +} - return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1); +static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) { + unsigned WidthX = Op.getOperand(0).getValueSizeInBits(); + unsigned WidthY = Op.getOperand(1).getValueSizeInBits(); + EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY); + SDValue Const1 = DAG.getConstant(1, MVT::i32); + DebugLoc DL = Op.getDebugLoc(); + + // Bitcast to integer nodes. + SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0)); + SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1)); + + if (HasR2) { + // ext E, Y, width(Y) - 1, 1 ; extract bit width(Y)-1 of Y + // ins X, E, width(X) - 1, 1 ; insert extracted bit at bit width(X)-1 of X + SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y, + DAG.getConstant(WidthY - 1, MVT::i32), Const1); + + if (WidthX > WidthY) + E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E); + else if (WidthY > WidthX) + E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E); + + SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E, + DAG.getConstant(WidthX - 1, MVT::i32), Const1, X); + return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I); + } + + // (d)sll SllX, X, 1 + // (d)srl SrlX, SllX, 1 + // (d)srl SrlY, Y, width(Y)-1 + // (d)sll SllY, SrlX, width(Y)-1 + // or Or, SrlX, SllY + SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1); + SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1); + SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y, + DAG.getConstant(WidthY - 1, MVT::i32)); + + if (WidthX > WidthY) + SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY); + else if (WidthY > WidthX) + SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY); + + SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY, + DAG.getConstant(WidthX - 1, MVT::i32)); + SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY); + return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or); } SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { - EVT Ty = Op.getValueType(); + if (Subtarget->hasMips64()) + return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2()); - assert(Ty == MVT::f32 || Ty == MVT::f64); + return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2()); +} - if (Ty == MVT::f32 || HasMips64) - return LowerFCOPYSIGNLargeIntReg(Op, DAG); +static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) { + SDValue Res, Const1 = DAG.getConstant(1, MVT::i32); + DebugLoc DL = Op.getDebugLoc(); + + // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it + // to i32. + SDValue X = (Op.getValueType() == MVT::f32) ? + DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) : + DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), + Const1); + + // Clear MSB. + if (HasR2) + Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, + DAG.getRegister(Mips::ZERO, MVT::i32), + DAG.getConstant(31, MVT::i32), Const1, X); + else { + SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1); + Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1); + } + + if (Op.getValueType() == MVT::f32) + return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res); + + SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Op.getOperand(0), DAG.getConstant(0, MVT::i32)); + return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); +} + +static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) { + SDValue Res, Const1 = DAG.getConstant(1, MVT::i32); + DebugLoc DL = Op.getDebugLoc(); + + // Bitcast to integer node. + SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0)); + + // Clear MSB. + if (HasR2) + Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64, + DAG.getRegister(Mips::ZERO_64, MVT::i64), + DAG.getConstant(63, MVT::i32), Const1, X); + else { + SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1); + Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1); + } - return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle()); + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res); +} + +SDValue +MipsTargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64)) + return LowerFABS64(Op, DAG, Subtarget->hasMips32r2()); + + return LowerFABS32(Op, DAG, Subtarget->hasMips32r2()); } SDValue MipsTargetLowering:: @@ -2545,7 +2654,8 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, std::vector<SDValue>& OutChains, SelectionDAG &DAG, unsigned NumWords, SDValue FIN, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags) { + const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const Argument *FuncArg) { unsigned LocMem = VA.getLocMemOffset(); unsigned FirstWord = LocMem / 4; @@ -2560,8 +2670,8 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN, DAG.getConstant(i * 4, MVT::i32)); SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32), - StorePtr, MachinePointerInfo(), false, - false, 0); + StorePtr, MachinePointerInfo(FuncArg, i * 4), + false, false, 0); OutChains.push_back(Store); } } @@ -2573,7 +2683,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, MachineFrameInfo *MFI, bool IsRegLoc, SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, - EVT PtrTy) { + EVT PtrTy, const Argument *FuncArg) { const uint16_t *Reg = Mips64IntRegs + 8; int FOOffset; // Frame object offset from virtual frame pointer. @@ -2597,8 +2707,8 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN, DAG.getConstant(I * 8, PtrTy)); SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64), - StorePtr, MachinePointerInfo(), false, - false, 0); + StorePtr, MachinePointerInfo(FuncArg, I * 8), + false, false, 0); OutChains.push_back(Store); } @@ -2634,9 +2744,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); + Function::const_arg_iterator FuncArg = + DAG.getMachineFunction().getFunction()->arg_begin(); int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++FuncArg) { CCValAssign &VA = ArgLocs[i]; EVT ValVT = VA.getValVT(); ISD::ArgFlagsTy Flags = Ins[i].Flags; @@ -2651,11 +2763,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, true); SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); InVals.push_back(FIN); - ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags); + ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags, + &*FuncArg); } else // N32/64 LastFI = CopyMips64ByValRegs(MF, Chain, dl, OutChains, DAG, VA, Flags, MFI, IsRegLoc, InVals, MipsFI, - getPointerTy()); + getPointerTy(), &*FuncArg); continue; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 66f45cd..c36f40f 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -131,6 +131,7 @@ namespace llvm { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index fe5eaec..14d8f1e 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -47,17 +47,17 @@ def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64", SDT_MipsExtractElementF64>; // Operand for printing out a condition code. -let PrintMethod = "printFCCOperand" in +let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in def condcode : Operand<i32>; //===----------------------------------------------------------------------===// // Feature predicates. //===----------------------------------------------------------------------===// -def IsFP64bit : Predicate<"Subtarget.isFP64bit()">; -def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">; -def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">; -def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; +def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">; +def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">; +def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">; +def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; // FP immediate patterns. def fpimm0 : PatLeaf<(fpimm), [{ @@ -83,6 +83,7 @@ def fpimm0neg : PatLeaf<(fpimm), [{ //===----------------------------------------------------------------------===// // FP load. +let DecoderMethod = "DecodeFMem" in { class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem<op, (outs RC:$ft), (ins MemOpnd:$addr), !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load_a addr:$addr))], @@ -93,7 +94,7 @@ class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr), !strconcat(opstr, "\t$ft, $addr"), [(store_a RC:$ft, addr:$addr)], IIStore>; - +} // FP indexed load. class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC, RegisterClass PRC, PatFrag FOp>: @@ -118,11 +119,13 @@ multiclass FFR1_W_M<bits<6> funct, string opstr> { def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>, Requires<[NotFP64bit]>; def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>, - Requires<[IsFP64bit]>; + Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; + } } // Instructions that convert an FP value to 64-bit fixed point. -let Predicates = [IsFP64bit] in +let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in multiclass FFR1_L_M<bits<6> funct, string opstr> { def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>; def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>; @@ -134,7 +137,9 @@ multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> { def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>, Requires<[NotFP64bit]>; def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>, - Requires<[IsFP64bit]>; + Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; + } } multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> { @@ -143,9 +148,11 @@ multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> { def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>, Requires<[NotFP64bit]>; def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>, - Requires<[IsFP64bit]>; + Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; } } +} // FP madd/msub/nmadd/nmsub instruction classes. class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr, @@ -172,9 +179,11 @@ defm CEIL_L : FFR1_L_M<0xa, "ceil">; defm FLOOR_W : FFR1_W_M<0xf, "floor">; defm FLOOR_L : FFR1_L_M<0xb, "floor">; defm CVT_W : FFR1_W_M<0x24, "cvt">; -defm CVT_L : FFR1_L_M<0x25, "cvt">; +//defm CVT_L : FFR1_L_M<0x25, "cvt">; def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>; +def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>; +def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>; let Predicates = [NotFP64bit] in { def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>; @@ -182,7 +191,7 @@ let Predicates = [NotFP64bit] in { def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>; } -let Predicates = [IsFP64bit] in { +let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in { def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>; def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>; def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>; @@ -190,8 +199,10 @@ let Predicates = [IsFP64bit] in { def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>; } -defm FABS : FFR1P_M<0x5, "abs", fabs>; -defm FNEG : FFR1P_M<0x7, "neg", fneg>; +let Predicates = [NoNaNsFPMath] in { + defm FABS : FFR1P_M<0x5, "abs", fabs>; + defm FNEG : FFR1P_M<0x7, "neg", fneg>; +} defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>; // The odd-numbered registers are only referenced when doing loads, @@ -233,14 +244,20 @@ def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>; def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>, Requires<[NotFP64bit]>; def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>, - Requires<[IsFP64bit]>; + Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; +} /// Floating Point Memory Instructions -let Predicates = [IsN64] in { +let Predicates = [IsN64], DecoderNamespace = "Mips64" in { def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>; def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>; - def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64>; - def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64>; + def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> { + let isCodeGenOnly =1; + } + def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64> { + let isCodeGenOnly =1; + } } let Predicates = [NotN64] in { @@ -248,7 +265,7 @@ let Predicates = [NotN64] in { def SWC1 : FPStore<0x39, "swc1", FGR32, mem>; } -let Predicates = [NotN64, HasMips64] in { +let Predicates = [NotN64, HasMips64], DecoderNamespace = "Mips64" in { def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>; def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>; } @@ -271,13 +288,13 @@ let Predicates = [HasMips32r2, NotMips64] in { def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>; } -let Predicates = [HasMips64, NotN64] in { +let Predicates = [HasMips64, NotN64], DecoderNamespace="Mips64" in { def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>; def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>; } // n64 -let Predicates = [IsN64] in { +let Predicates = [IsN64], isCodeGenOnly=1 in { def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>; def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>; def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>; @@ -312,12 +329,12 @@ let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath] in { def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>; } -let Predicates = [HasMips32r2, IsFP64bit] in { +let Predicates = [HasMips32r2, IsFP64bit], isCodeGenOnly=1 in { def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>; def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>; } -let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath] in { +let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath], isCodeGenOnly=1 in { def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>; def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>; } @@ -340,9 +357,10 @@ let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in let Inst{16} = tf; } +let DecoderMethod = "DecodeBC1" in { def BC1F : FBRANCH<0, 0, MIPS_BRANCH_F, "bc1f">; def BC1T : FBRANCH<0, 1, MIPS_BRANCH_T, "bc1t">; - +} //===----------------------------------------------------------------------===// // Floating Point Flag Conditions //===----------------------------------------------------------------------===// @@ -374,7 +392,9 @@ class FCMP<bits<5> fmt, RegisterClass RC, string typestr> : let Defs=[FCR31] in { def FCMP_S32 : FCMP<0x10, FGR32, "s">; def FCMP_D32 : FCMP<0x11, AFGR64, "d">, Requires<[NotFP64bit]>; - def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]>; + def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; + } } //===----------------------------------------------------------------------===// @@ -436,13 +456,13 @@ let Predicates = [IsFP64bit] in { // Patterns for unaligned floating point loads and stores. let Predicates = [HasMips32r2Or64, NotN64] in { - def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; + def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; def : Pat<(store_u FGR32:$src, CPURegs:$addr), (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; } let Predicates = [IsN64] in { - def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; + def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; def : Pat<(store_u FGR32:$src, CPU64Regs:$addr), (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; } diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 4555303..841eba0 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -45,6 +45,8 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, let Namespace = "Mips"; + let Size = 4; + bits<6> Opcode = 0; // Top 6 bits are the 'opcode' field @@ -64,6 +66,10 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, // TSFlags layout should be kept in sync with MipsInstrInfo.h. let TSFlags{3-0} = FormBits; + + let DecoderNamespace = "Mips"; + + field bits<32> SoftFail = 0; } // Mips Pseudo Instructions Format diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index bc85fa6..873d2bd 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -121,21 +121,36 @@ def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">; -def HasBitCount : Predicate<"Subtarget.hasBitCount()">; -def HasSwap : Predicate<"Subtarget.hasSwap()">; -def HasCondMov : Predicate<"Subtarget.hasCondMov()">; -def HasMips32 : Predicate<"Subtarget.hasMips32()">; -def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">; -def HasMips64 : Predicate<"Subtarget.hasMips64()">; -def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">; -def NotMips64 : Predicate<"!Subtarget.hasMips64()">; -def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">; -def IsN64 : Predicate<"Subtarget.isABI_N64()">; -def NotN64 : Predicate<"!Subtarget.isABI_N64()">; -def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; -def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">; -def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; +def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">, + AssemblerPredicate<"FeatureSEInReg">; +def HasBitCount : Predicate<"Subtarget.hasBitCount()">, + AssemblerPredicate<"FeatureBitCount">; +def HasSwap : Predicate<"Subtarget.hasSwap()">, + AssemblerPredicate<"FeatureSwap">; +def HasCondMov : Predicate<"Subtarget.hasCondMov()">, + AssemblerPredicate<"FeatureCondMov">; +def HasMips32 : Predicate<"Subtarget.hasMips32()">, + AssemblerPredicate<"FeatureMips32">; +def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">, + AssemblerPredicate<"FeatureMips32r2">; +def HasMips64 : Predicate<"Subtarget.hasMips64()">, + AssemblerPredicate<"FeatureMips64">; +def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">, + AssemblerPredicate<"FeatureMips32r2,FeatureMips64">; +def NotMips64 : Predicate<"!Subtarget.hasMips64()">, + AssemblerPredicate<"!FeatureMips64">; +def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">, + AssemblerPredicate<"FeatureMips64r2">; +def IsN64 : Predicate<"Subtarget.isABI_N64()">, + AssemblerPredicate<"FeatureN64">; +def NotN64 : Predicate<"!Subtarget.isABI_N64()">, + AssemblerPredicate<"!FeatureN64">; +def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, + AssemblerPredicate<"FeatureMips32">; +def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, + AssemblerPredicate<"FeatureMips32">; +def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">, + AssemblerPredicate<"FeatureMips32">; //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -148,12 +163,15 @@ def jmptarget : Operand<OtherVT> { def brtarget : Operand<OtherVT> { let EncoderMethod = "getBranchTargetOpValue"; let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeBranchTarget"; } def calltarget : Operand<iPTR> { let EncoderMethod = "getJumpTargetOpValue"; } def calltarget64: Operand<i64>; -def simm16 : Operand<i32>; +def simm16 : Operand<i32> { + let DecoderMethod= "DecodeSimm16"; +} def simm16_64 : Operand<i64>; def shamt : Operand<i32>; @@ -189,11 +207,13 @@ def mem_ea_64 : Operand<i64> { // size operand of ext instruction def size_ext : Operand<i32> { let EncoderMethod = "getSizeExtEncoding"; + let DecoderMethod = "DecodeExtSize"; } // size operand of ins instruction def size_ins : Operand<i32> { let EncoderMethod = "getSizeInsEncoding"; + let DecoderMethod = "DecodeInsSize"; } // Transformation Function - get the lower 16 bits. @@ -295,6 +315,7 @@ class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> { let shamt = 0; let isCommutable = isComm; + let isReMaterializable = 1; } class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm, @@ -310,7 +331,9 @@ class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type, RegisterClass RC> : FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16), !strconcat(instr_asm, "\t$rt, $rs, $imm16"), - [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu>; + [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu> { + let isReMaterializable = 1; +} class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type, RegisterClass RC> : @@ -365,6 +388,8 @@ class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>: FI<op, (outs RC:$rt), (ins Imm:$imm16), !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu> { let rs = 0; + let neverHasSideEffects = 1; + let isReMaterializable = 1; } class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, @@ -372,6 +397,7 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, bits<21> addr; let Inst{25-21} = addr{20-16}; let Inst{15-0} = addr{15-0}; + let DecoderMethod = "DecodeMem"; } // Memory Load/Store @@ -406,7 +432,10 @@ multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, Requires<[NotN64]>; def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 64-bit load. @@ -415,7 +444,10 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, Requires<[NotN64]>; def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 32-bit load. @@ -423,7 +455,10 @@ multiclass LoadUnAlign32<bits<6> op> { def #NAME# : LoadUnAlign<op, CPURegs, mem>, Requires<[NotN64]>; def _P8 : LoadUnAlign<op, CPURegs, mem64>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 32-bit store. multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, @@ -431,7 +466,10 @@ multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, Requires<[NotN64]>; def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 64-bit store. @@ -440,7 +478,10 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, Requires<[NotN64]>; def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 32-bit store. @@ -448,7 +489,10 @@ multiclass StoreUnAlign32<bits<6> op> { def #NAME# : StoreUnAlign<op, CPURegs, mem>, Requires<[NotN64]>; def _P8 : StoreUnAlign<op, CPURegs, mem64>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // Conditional Branch @@ -498,6 +542,7 @@ class JumpFJ<bits<6> op, string instr_asm>: let isBarrier=1; let hasDelaySlot = 1; let Predicates = [RelocStatic]; + let DecoderMethod = "DecodeJumpTarget"; } // Unconditional branch @@ -528,7 +573,9 @@ let isCall=1, hasDelaySlot=1 in { class JumpLink<bits<6> op, string instr_asm>: FJ<op, (outs), (ins calltarget:$target, variable_ops), !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], - IIBranch>; + IIBranch> { + let DecoderMethod = "DecodeJumpTarget"; + } class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>: @@ -555,6 +602,7 @@ class Mult<bits<6> func, string instr_asm, InstrItinClass itin, let shamt = 0; let isCommutable = 1; let Defs = DefRegs; + let neverHasSideEffects = 1; } class Mult32<bits<6> func, string instr_asm, InstrItinClass itin>: @@ -582,6 +630,7 @@ class MoveFromLOHI<bits<6> func, string instr_asm, RegisterClass RC, let rt = 0; let shamt = 0; let Uses = UseRegs; + let neverHasSideEffects = 1; } class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC, @@ -592,6 +641,7 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC, let rd = 0; let shamt = 0; let Defs = DefRegs; + let neverHasSideEffects = 1; } class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> : @@ -635,6 +685,7 @@ class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>: let rs = 0; let shamt = sa; let Predicates = [HasSwap]; + let neverHasSideEffects = 1; } // Read Hardware @@ -680,7 +731,9 @@ class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC, multiclass Atomic2Ops32<PatFrag Op, string Opstr> { def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, Requires<[NotN64]>; - def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, Requires<[IsN64]>; + def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + } } // Atomic Compare & Swap. @@ -692,7 +745,9 @@ class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC, multiclass AtomicCmpSwap32<PatFrag Op, string Width> { def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, Requires<[NotN64]>; - def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, Requires<[IsN64]>; + def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + } } class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : @@ -722,24 +777,13 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), [(callseq_end timm:$amt1, timm:$amt2)]>; } -// Some assembly macros need to avoid pseudoinstructions and assembler -// automatic reodering, we should reorder ourselves. -def MACRO : MipsPseudo<(outs), (ins), ".set\tmacro", []>; -def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>; -def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>; -def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>; - -// These macros are inserted to prevent GAS from complaining -// when using the AT register. -def NOAT : MipsPseudo<(outs), (ins), ".set\tnoat", []>; -def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>; - // When handling PIC code the assembler needs .cpload and .cprestore // directives. If the real instructions corresponding these directives // are used, we have the same behavior, but get also a bunch of warnings // from the assembler. -def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; -def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; +let neverHasSideEffects = 1 in +def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp), + ".cprestore\t$loc", []>; // For O32 ABI & PIC & non-fixed global base register, the following instruction // seqeunce is emitted to set the global base register: @@ -757,7 +801,10 @@ def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; // before or between instructions 0 and 1, which is a limitation imposed by // GNU linker. +let isTerminator = 1, isBarrier = 1 in def SETGP01 : MipsPseudo<(outs CPURegs:$dst), (ins), "", []>; + +let neverHasSideEffects = 1 in def SETGP2 : MipsPseudo<(outs CPURegs:$globalreg), (ins CPURegs:$picreg), "", []>; @@ -871,9 +918,14 @@ def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", /// Load-linked, Store-conditional def LL : LLBase<0x30, "ll", CPURegs, mem>, Requires<[NotN64]>; -def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]>; +def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; +} + def SC : SCBase<0x38, "sc", CPURegs, mem>, Requires<[NotN64]>; -def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>; +def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; +} /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; @@ -891,7 +943,7 @@ def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>; def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>; def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>; -let isReturn=1, isTerminator=1, hasDelaySlot=1, +let isReturn=1, isTerminator=1, hasDelaySlot=1, isCodeGenOnly=1, isBarrier=1, hasCtrlDep=1, rd=0, rt=0, shamt=0 in def RET : FR <0x00, 0x08, (outs), (ins CPURegs:$target), "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>; @@ -926,13 +978,17 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>; +def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { + let isCodeGenOnly = 1; +} // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>; +def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { + let isCodeGenOnly = 1; +} // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h index ad3c930..f4c4ae8 100644 --- a/lib/Target/Mips/MipsJITInfo.h +++ b/lib/Target/Mips/MipsJITInfo.h @@ -19,8 +19,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetJITInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" namespace llvm { class MipsTargetMachine; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 0d51298..1597b93 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -26,9 +26,13 @@ using namespace llvm; -MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, - MipsAsmPrinter &asmprinter) - : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {} +MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter) + : AsmPrinter(asmprinter) {} + +void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) { + Mang = M; + Ctx = C; +} MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, @@ -90,7 +94,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, llvm_unreachable("<unknown operand type>"); } - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx); if (!Offset) return MCOperand::CreateExpr(MCSym); @@ -98,76 +102,68 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, // Assume offset is never negative. assert(Offset > 0); - const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, Ctx); + const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); + const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); return MCOperand::CreateExpr(AddExpr); } +static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0, + const MCOperand& Opnd1, + const MCOperand& Opnd2 = MCOperand()) { + Inst.setOpcode(Opc); + Inst.addOperand(Opnd0); + Inst.addOperand(Opnd1); + if (Opnd2.isValid()) + Inst.addOperand(Opnd2); +} + // Lower ".cpload $reg" to // "lui $gp, %hi(_gp_disp)" // "addiu $gp, $gp, %lo(_gp_disp)" -// "addu $gp. $gp, $reg" -void MipsMCInstLower::LowerCPLOAD(const MachineInstr *MI, - SmallVector<MCInst, 4>& MCInsts) { - MCInst Lui, Addiu, Addu; +// "addu $gp, $gp, $t9" +void MipsMCInstLower::LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts) { + MCOperand GPReg = MCOperand::CreateReg(Mips::GP); + MCOperand T9Reg = MCOperand::CreateReg(Mips::T9); StringRef SymName("_gp_disp"); - const MCSymbol *Symbol = Ctx.GetOrCreateSymbol(SymName); + const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName); const MCSymbolRefExpr *MCSym; - // lui $gp, %hi(_gp_disp) - Lui.setOpcode(Mips::LUi); - Lui.addOperand(MCOperand::CreateReg(Mips::GP)); - MCSym = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_Mips_ABS_HI, Ctx); - Lui.addOperand(MCOperand::CreateExpr(MCSym)); - MCInsts.push_back(Lui); - - // addiu $gp, $gp, %lo(_gp_disp) - Addiu.setOpcode(Mips::ADDiu); - Addiu.addOperand(MCOperand::CreateReg(Mips::GP)); - Addiu.addOperand(MCOperand::CreateReg(Mips::GP)); - MCSym = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_Mips_ABS_LO, Ctx); - Addiu.addOperand(MCOperand::CreateExpr(MCSym)); - MCInsts.push_back(Addiu); - - // addu $gp. $gp, $reg - Addu.setOpcode(Mips::ADDu); - Addu.addOperand(MCOperand::CreateReg(Mips::GP)); - Addu.addOperand(MCOperand::CreateReg(Mips::GP)); - const MachineOperand &MO = MI->getOperand(0); - assert(MO.isReg() && "CPLOAD's operand must be a register."); - Addu.addOperand(MCOperand::CreateReg(MO.getReg())); - MCInsts.push_back(Addu); + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx); + MCOperand SymHi = MCOperand::CreateExpr(MCSym); + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx); + MCOperand SymLo = MCOperand::CreateExpr(MCSym); + + MCInsts.resize(3); + + CreateMCInst(MCInsts[0], Mips::LUi, GPReg, SymHi); + CreateMCInst(MCInsts[1], Mips::ADDiu, GPReg, GPReg, SymLo); + CreateMCInst(MCInsts[2], Mips::ADDu, GPReg, GPReg, T9Reg); } // Lower ".cprestore offset" to "sw $gp, offset($sp)". -void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, +void MipsMCInstLower::LowerCPRESTORE(int64_t Offset, SmallVector<MCInst, 4>& MCInsts) { - const MachineOperand &MO = MI->getOperand(0); - assert(MO.isImm() && "CPRESTORE's operand must be an immediate."); - unsigned Offset = MO.getImm(), Reg = Mips::SP; - MCInst Sw; + assert(isInt<32>(Offset) && (Offset >= 0) && + "Imm operand of .cprestore must be a non-negative 32-bit value."); - if (Offset >= 0x8000) { - unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0); + MCOperand SPReg = MCOperand::CreateReg(Mips::SP), BaseReg = SPReg; + MCOperand GPReg = MCOperand::CreateReg(Mips::GP); + + if (!isInt<16>(Offset)) { + unsigned Hi = ((Offset + 0x8000) >> 16) & 0xffff; Offset &= 0xffff; - Reg = Mips::AT; + MCOperand ATReg = MCOperand::CreateReg(Mips::AT); + BaseReg = ATReg; // lui at,hi // addu at,at,sp MCInsts.resize(2); - MCInsts[0].setOpcode(Mips::LUi); - MCInsts[0].addOperand(MCOperand::CreateReg(Mips::AT)); - MCInsts[0].addOperand(MCOperand::CreateImm(Hi)); - MCInsts[1].setOpcode(Mips::ADDu); - MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); - MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); - MCInsts[1].addOperand(MCOperand::CreateReg(Mips::SP)); + CreateMCInst(MCInsts[0], Mips::LUi, ATReg, MCOperand::CreateImm(Hi)); + CreateMCInst(MCInsts[1], Mips::ADDu, ATReg, ATReg, SPReg); } - Sw.setOpcode(Mips::SW); - Sw.addOperand(MCOperand::CreateReg(Mips::GP)); - Sw.addOperand(MCOperand::CreateReg(Reg)); - Sw.addOperand(MCOperand::CreateImm(Offset)); + MCInst Sw; + CreateMCInst(Sw, Mips::SW, GPReg, BaseReg, MCOperand::CreateImm(Offset)); MCInsts.push_back(Sw); } @@ -332,18 +328,16 @@ void MipsMCInstLower::LowerSETGP01(const MachineInstr *MI, assert(MO.isReg()); MCOperand RegOpnd = MCOperand::CreateReg(MO.getReg()); StringRef SymName("_gp_disp"); - const MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); + const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName); const MCSymbolRefExpr *MCSym; + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx); + MCOperand SymHi = MCOperand::CreateExpr(MCSym); + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx); + MCOperand SymLo = MCOperand::CreateExpr(MCSym); + MCInsts.resize(2); - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, Ctx); - MCInsts[0].setOpcode(Mips::LUi); - MCInsts[0].addOperand(RegOpnd); - MCInsts[0].addOperand(MCOperand::CreateExpr(MCSym)); - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, Ctx); - MCInsts[1].setOpcode(Mips::ADDiu); - MCInsts[1].addOperand(RegOpnd); - MCInsts[1].addOperand(RegOpnd); - MCInsts[1].addOperand(MCOperand::CreateExpr(MCSym)); + CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi); + CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo); } diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 20bb338..c1d007d 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -26,15 +26,15 @@ namespace llvm { // MCInst. class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { typedef MachineOperand::MachineOperandType MachineOperandType; - MCContext &Ctx; + MCContext *Ctx; Mangler *Mang; MipsAsmPrinter &AsmPrinter; public: - MipsMCInstLower(Mangler *mang, const MachineFunction &MF, - MipsAsmPrinter &asmprinter); + MipsMCInstLower(MipsAsmPrinter &asmprinter); + void Initialize(Mangler *mang, MCContext* C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); - void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); + void LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts); + void LowerCPRESTORE(int64_t Offset, SmallVector<MCInst, 4>& MCInsts); void LowerUnalignedLoadStore(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index abb5404..0fde55c 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -14,7 +14,6 @@ #ifndef MIPS_MACHINE_FUNCTION_INFO_H #define MIPS_MACHINE_FUNCTION_INFO_H -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include <utility> @@ -50,12 +49,14 @@ class MipsFunctionInfo : public MachineFunctionInfo { mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; + bool EmitNOAT; + public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0) + MaxCallFrameSize(0), EmitNOAT(false) {} bool isInArgFI(int FI) const { @@ -100,6 +101,9 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } + + bool getEmitNOAT() const { return EmitNOAT; } + void setEmitNOAT() { EmitNOAT = true; } }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 5cfda34..f30de44 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -62,7 +62,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const return CSR_O32_SaveList; else if (Subtarget.isABI_N32()) return CSR_N32_SaveList; - + assert(Subtarget.isABI_N64()); return CSR_N64_SaveList; } @@ -125,9 +125,18 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::GP_64); } + // Reserve hardware registers. + Reserved.set(Mips::HWR29); + Reserved.set(Mips::HWR29_64); + return Reserved; } +bool +MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + return true; +} + // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void MipsRegisterInfo:: @@ -223,8 +232,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - // FIXME: change this when mips goes MC". - BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); + MipsFI->setEmitNOAT(); // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register @@ -245,7 +253,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, FrameReg = ATReg; Offset = SignExtend64<16>(Inst->ImmOpnd); - BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO)); } MI.getOperand(i).ChangeToRegister(FrameReg, false); diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 7037ca6..0716d29 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -47,6 +47,8 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index d4a50ee..00347df 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -13,6 +13,7 @@ #include "MipsSubtarget.h" #include "Mips.h" +#include "MipsRegisterInfo.h" #include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC @@ -54,3 +55,14 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, if (TT.find("linux") == std::string::npos) IsLinux = false; } + +bool +MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(hasMips64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass); + return OptLevel >= CodeGenOpt::Aggressive; +} diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ba0bbac..7faf77b 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -89,6 +89,9 @@ protected: InstrItineraryData InstrItins; public: + virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; /// Only O32 and EABI supported right now. bool isABI_EABI() const { return MipsABI == EABI; } diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp index ec7e2a7..1830213 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -18,26 +18,24 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "PTXGenAsmWriter.inc" PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MRI) { + MCInstPrinter(MAI, MII, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } -StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // Decode the register number into type and offset unsigned RegSpace = RegNo & 0x7; diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h index eef6101..ea4d504 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h @@ -23,15 +23,12 @@ class MCOperand; class PTXInstPrinter : public MCInstPrinter { public: - PTXInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI); + PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - static const char *getInstructionName(unsigned Opcode); - // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index 7671b11..08fb970 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -62,10 +62,11 @@ static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createPTXMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { assert(SyntaxVariant == 0 && "We only have one syntax variant"); - return new PTXInstPrinter(MAI, MRI, STI); + return new PTXInstPrinter(MAI, MII, MRI, STI); } extern "C" void LLVMInitializePTXTargetMC() { diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h index 1003b0b..542638a 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h @@ -15,9 +15,7 @@ #define PTXMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target ThePTX32Target; extern Target ThePTX64Target; diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index db1c953..ef4455b 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -97,7 +97,8 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) // customise setcc to use bitwise logic if possible - setOperationAction(ISD::SETCC, MVT::i1, Custom); + //setOperationAction(ISD::SETCC, MVT::i1, Custom); + setOperationAction(ISD::SETCC, MVT::i1, Legal); // customize translation of memory addresses @@ -156,18 +157,27 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); // Look for X == 0, X == 1, X != 0, or X != 1 // We can simplify these to bitwise logic - if (Op1.getOpcode() == ISD::Constant && - (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || - cast<ConstantSDNode>(Op1)->isNullValue()) && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { + //if (Op1.getOpcode() == ISD::Constant && + // (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + // cast<ConstantSDNode>(Op1)->isNullValue()) && + // (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // + // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + //} - return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); - } + //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1); + //if(COp1 && COp1->getZExtValue() == 1) { + // if(CC == ISD::SETNE) { + // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0); + // } + //} + + llvm_unreachable("setcc was not matched by a pattern!"); return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); } @@ -384,22 +394,22 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>(); PTXParamManager &PM = PTXMFI->getParamManager(); MachineFrameInfo *MFI = MF.getFrameInfo(); - + assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && "Calls are not handled for the target device"); // Identify the callee function const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); const Function *function = cast<Function>(GV); - + // allow non-device calls only for printf - bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; - + bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; + assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) && "PTX function calls must be to PTX device functions"); - + unsigned outSize = isPrintf ? 2 : Outs.size(); - + std::vector<SDValue> Ops; // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] Ops.resize(outSize + Ins.size() + 4); @@ -412,7 +422,7 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // #Outs Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32); - + if (isPrintf) { // first argument is the address of the global string variable in memory unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits()); @@ -421,29 +431,29 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue0, OutVals[0]); Ops[Ins.size()+4] = ParamValue0; - + // alignment is the maximum size of all the arguments unsigned alignment = 0; for (unsigned i = 1; i < OutVals.size(); ++i) { - alignment = std::max(alignment, + alignment = std::max(alignment, OutVals[i].getValueType().getSizeInBits()); } // size is the alignment multiplied by the number of arguments unsigned size = alignment * (OutVals.size() - 1); - + // second argument is the address of the stack object (unless no arguments) unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits()); SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(), MVT::Other); Ops[Ins.size()+5] = ParamValue1; - + if (size > 0) { // create a local stack object to store the arguments unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false); SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy()); - + // store each of the arguments to the stack in turn for (unsigned int i = 1; i != OutVals.size(); i++) { SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy())); @@ -475,7 +485,7 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops[i+Ins.size()+4] = ParamValue; } } - + std::vector<SDValue> InParams; // Generate list of .param variables to hold the return value(s). diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 818d444..bead428 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -808,6 +808,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isBranch = 1, isTerminator = 1 in { // FIXME: The pattern part is blank because I cannot (or do not yet know // how to) use the first operand of PredicateOperand (a RegPred register) here + // When this is revisited, make sure to also look at LowerSETCC and try to + // fold it into negated predicates, if possible. def BRAdp : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [/*(brcond pred:$_p, bb:$d)*/]>; @@ -1017,6 +1019,9 @@ def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; +// setcc - predicate inversion for branch conditions +def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)), + (XORripreds RegPred:$a, imm:$b)>; ///===- Intrinsic Instructions --------------------------------------------===// include "PTXIntrinsicInstrInfo.td" diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 40835d0..c55a658 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -17,7 +17,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -153,10 +152,10 @@ bool PTXPassConfig::addPostRegAlloc() { /// Add passes that optimize machine instructions after register allocation. void PTXPassConfig::addMachineLateOptimization() { if (addPass(BranchFolderPassID) != &NoPassID) - printNoVerify("After BranchFolding"); + printAndVerify("After BranchFolding"); if (addPass(TailDuplicateID) != &NoPassID) - printNoVerify("After TailDuplicate"); + printAndVerify("After TailDuplicate"); } bool PTXPassConfig::addPreEmitPass() { diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 000d6d4..61d23ce 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -17,16 +17,12 @@ #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "PPCGenAsmWriter.inc" -StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 21fc733..73fd534 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -24,9 +24,9 @@ class PPCInstPrinter : public MCInstPrinter { // 0 -> AIX, 1 -> Darwin. unsigned SyntaxVariant; public: - PPCInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - unsigned syntaxVariant) - : MCInstPrinter(MAI, MRI), SyntaxVariant(syntaxVariant) {} + PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, unsigned syntaxVariant) + : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {} bool isDarwinSyntax() const { return SyntaxVariant == 1; @@ -34,9 +34,6 @@ public: virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; - - static const char *getInstructionName(unsigned Opcode); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 9c6eefe..48de583 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 226fbfe..6568e82 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -108,9 +108,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createPPCMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new PPCInstPrinter(MAI, MRI, SyntaxVariant); + return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 724374c..c554d39 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -34,6 +34,7 @@ def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">; def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; +def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -87,6 +88,10 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureFSqrt, FeatureSTFIWX, + Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4abb469..fb7aa71 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -450,6 +450,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc7400", "ppc750", "ppc970", + "ppcA2", "ppc64" }; diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 9883c2e..b2b5364 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -12,10 +12,6 @@ // //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A> - : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; - //===----------------------------------------------------------------------===// // Return Value Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index ae317af..6ed1fb9 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -22,17 +22,29 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// PowerPC 440 Hazard Recognizer -void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { +// PowerPC Scoreboard Hazard Recognizer +void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { const MCInstrDesc *MCID = DAG->getInstrDesc(SU); - if (!MCID) { + if (!MCID) // This is a PPC pseudo-instruction. return; - } ScoreboardHazardRecognizer::EmitInstruction(SU); } +ScheduleHazardRecognizer::HazardType +PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void PPCScoreboardHazardRecognizer::AdvanceCycle() { + ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCScoreboardHazardRecognizer::Reset() { + ScoreboardHazardRecognizer::Reset(); +} + //===----------------------------------------------------------------------===// // PowerPC 970 Hazard Recognizer // @@ -61,7 +73,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) : TII(tii) { - LastWasBL8_ELF = false; EndDispatchGroup(); } @@ -132,15 +143,6 @@ getHazardType(SUnit *SU, int Stalls) { return NoHazard; unsigned Opcode = MI->getOpcode(); - - // If the last instruction was a BL8_ELF, then the NOP must follow it - // directly (this is strong requirement from the linker due to the ELF ABI). - // We return only Hazard (and not NoopHazard) because if the NOP is necessary - // then it will already be in the instruction stream (it is not always - // necessary; tail calls, for example, do not need it). - if (LastWasBL8_ELF && Opcode != PPC::NOP) - return Hazard; - bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = GetInstrType(Opcode, isFirst, isSingle, isCracked, @@ -199,8 +201,6 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { return; unsigned Opcode = MI->getOpcode(); - LastWasBL8_ELF = (Opcode == PPC::BL8_ELF); - bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = GetInstrType(Opcode, isFirst, isSingle, isCracked, @@ -240,7 +240,6 @@ void PPCHazardRecognizer970::AdvanceCycle() { } void PPCHazardRecognizer970::Reset() { - LastWasBL8_ELF = false; EndDispatchGroup(); } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index d80a385..55b45d0 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -21,16 +21,19 @@ namespace llvm { -/// PPCHazardRecognizer440 - This class implements a scoreboard-based -/// hazard recognizer for the PPC 440 and friends. -class PPCHazardRecognizer440 : public ScoreboardHazardRecognizer { +/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for generic PPC processors. +class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer { const ScheduleDAG *DAG; public: - PPCHazardRecognizer440(const InstrItineraryData *ItinData, + PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG_) : ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void Reset(); }; /// PPCHazardRecognizer970 - This class defines a finite state automata that @@ -49,9 +52,6 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. bool HasCTRSet; - // Was the last instruction issued a BL8_ELF - bool LastWasBL8_ELF; - // StoredPtr - Keep track of the address of any store. If we see a load from // the same address (or one that aliases it), disallow the store. We can have // up to four stores in one dispatch group, hence we track up to 4. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6651d14..5a04888 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -377,8 +377,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { DebugLoc dl = N->getDebugLoc(); APInt LKZ, LKO, RKZ, RKO; - CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO); - CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO); + CurDAG->ComputeMaskedBits(Op0, LKZ, LKO); + CurDAG->ComputeMaskedBits(Op1, RKZ, RKO); unsigned TargetMask = LKZ.getZExtValue(); unsigned InsertMask = RKZ.getZExtValue(); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 85b5bc1..3b24951 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -226,11 +226,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - // VAARG is custom lowered with the 32-bit SVR4 ABI. - if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI() - && !TM.getSubtarget<PPCSubtarget>().isPPC64()) { - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::i64, Custom); + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { + if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + // VAARG always uses double-word chunks, so promote anything smaller. + setOperationAction(ISD::VAARG, MVT::i1, Promote); + AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i8, Promote); + AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i16, Promote); + AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i32, Promote); + AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + } else { + // VAARG is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i64, Custom); + } } else setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -377,6 +389,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } + if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) + setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); @@ -431,7 +446,16 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) return 4; - // FIXME SVR4 TBD + + // 16byte and wider vectors are passed on 16byte boundary. + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VTy->getBitWidth() >= 128) + return 16; + + // The rest is 8 on PPC64 and 4 on PPC32 boundary. + if (PPCSubTarget.isPPC64()) + return 8; + return 4; } @@ -460,6 +484,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; + case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; case PPCISD::NOP: return "PPCISD::NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; @@ -835,14 +860,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, APInt LHSKnownZero, LHSKnownOne; APInt RHSKnownZero, RHSKnownOne; DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), LHSKnownZero, LHSKnownOne); if (LHSKnownZero.getBoolValue()) { DAG.ComputeMaskedBits(N.getOperand(1), - APInt::getAllOnesValue(N.getOperand(1) - .getValueSizeInBits()), RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -897,10 +918,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), - LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't @@ -1013,10 +1031,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), - LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -2801,9 +2816,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); } - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - // Add a NOP immediately after the branch instruction when using the 64-bit // SVR4 ABI. At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub @@ -2812,8 +2824,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. + + bool needsTOCRestore = false; if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); if (CallOpc == PPCISD::BCTRL_SVR4) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -2824,14 +2837,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. - Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); - InFlag = Chain.getValue(1); - } else { + needsTOCRestore = true; + } else if (CallOpc == PPCISD::CALL_SVR4) { // Otherwise insert NOP. - InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag); + CallOpc = PPCISD::CALL_NOP_SVR4; } } + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + if (needsTOCRestore) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + InFlag = Chain.getValue(1); + } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), InFlag); @@ -5486,12 +5507,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, //===----------------------------------------------------------------------===// void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { @@ -5725,7 +5745,7 @@ bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ return (V > -(1 << 16) && V < (1 << 16)-1); } -bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { +bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { return false; } @@ -5818,3 +5838,12 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } } + +Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { + unsigned Directive = PPCSubTarget.getDarwinDirective(); + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) + return Sched::ILP; + + return TargetLowering::getSchedulingPreference(N); +} + diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 2e046c4..18eb072 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -95,7 +95,9 @@ namespace llvm { EXTSW_32, /// CALL - A direct function call. - CALL_Darwin, CALL_SVR4, + /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit + /// SVR4 calls. + CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4, /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, @@ -279,6 +281,7 @@ namespace llvm { bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// @@ -293,7 +296,6 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 78f3596..7f67a41 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -89,10 +89,22 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", BrB, []>; // See Pat patterns below. + + let isCodeGenOnly = 1 in + def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, variable_ops), + "bl $func\n\tnop", BrB, []>; + def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; + + let isCodeGenOnly = 1 in + def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, + (outs), (ins aaddr:$func, variable_ops), + "bla $func\n\tnop", BrB, + [(PPCcall_nop_SVR4 (i64 imm:$func))]>; } let Uses = [X11, CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, @@ -111,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)), + (BL8_NOP_ELF tglobaladdr:$dst)>; + def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)), + (BL8_NOP_ELF texternalsym:$dst)>; + def : Pat<(PPCnop), (NOP)>; @@ -506,7 +524,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStGeneral, + "lhau $rD, $disp($rA)", LdStLoad, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! @@ -516,38 +534,38 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), - "lbz $rD, $src", LdStGeneral, + "lbz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), - "lhz $rD, $src", LdStGeneral, + "lhz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), - "lwz $rD, $src", LdStGeneral, + "lwz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStGeneral, + "lbzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStGeneral, + "lhzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStGeneral, + "lwzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStGeneral, + "lbzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStGeneral, + "lhzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStGeneral, + "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; } @@ -595,24 +613,24 @@ def : Pat<(PPCload xaddr:$src), let PPC970_Unit = 2 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), - "stb $rS, $src", LdStGeneral, + "stb $rS, $src", LdStStore, [(truncstorei8 G8RC:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), - "sth $rS, $src", LdStGeneral, + "sth $rS, $src", LdStStore, [(truncstorei16 G8RC:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), - "stw $rS, $src", LdStGeneral, + "stw $rS, $src", LdStStore, [(truncstorei32 G8RC:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), - "stbx $rS, $dst", LdStGeneral, + "stbx $rS, $dst", LdStStore, [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), - "sthx $rS, $dst", LdStGeneral, + "sthx $rS, $dst", LdStStore, [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), - "stwx $rS, $dst", LdStGeneral, + "stwx $rS, $dst", LdStStore, [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; // Normal 8-byte stores. @@ -629,14 +647,14 @@ let PPC970_Unit = 2 in { def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + "stbu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + "sthu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 707fa41..6c0f3d3 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -188,85 +188,85 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStGeneral /*FIXME*/, []>; + "dss $STRM", LdStLoad /*FIXME*/, []>; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStGeneral /*FIXME*/, []>; + "dssall", LdStLoad /*FIXME*/, []>; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), - "mfvscr $vD", LdStGeneral, + "mfvscr $vD", LdStStore, [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), - "mtvscr $vB", LdStGeneral, + "mtvscr $vB", LdStLoad, [(int_ppc_altivec_mtvscr VRRC:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), - "lvebx $vD, $src", LdStGeneral, + "lvebx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), - "lvehx $vD, $src", LdStGeneral, + "lvehx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), - "lvewx $vD, $src", LdStGeneral, + "lvewx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), - "lvx $vD, $src", LdStGeneral, + "lvx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), - "lvxl $vD, $src", LdStGeneral, + "lvxl $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), - "lvsl $vD, $src", LdStGeneral, + "lvsl $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), - "lvsr $vD, $src", LdStGeneral, + "lvsr $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), - "stvebx $rS, $dst", LdStGeneral, + "stvebx $rS, $dst", LdStStore, [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), - "stvehx $rS, $dst", LdStGeneral, + "stvehx $rS, $dst", LdStStore, [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), - "stvewx $rS, $dst", LdStGeneral, + "stvewx $rS, $dst", LdStStore, [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), - "stvx $rS, $dst", LdStGeneral, + "stvx $rS, $dst", LdStStore, [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), - "stvxl $rS, $dst", LdStGeneral, + "stvxl $rS, $dst", LdStStore, [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; } diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index d332e2a..d8e4b2b 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -51,6 +51,36 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } +// Two joined instructions; used to emit two adjacent instructions as one. +// The itinerary from the first instruction is used for scheduling and +// classification. +class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : Instruction { + field bits<64> Inst; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode1; + let Inst{32-37} = opcode2; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; +} // 1.7.1 I-Form class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, @@ -164,6 +194,35 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; + + let Inst{38-42} = A; + let Inst{43-47} = Addr{20-16}; // Base Reg + let Inst{48-63} = Addr{15-0}; // Displacement +} + +// This is used to emit BL8+NOP. +class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : IForm_and_DForm_1<opcode1, aa, lk, opcode2, + OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let Addr = 0; +} + class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 7a8ec40..b45ada9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -49,9 +50,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_440) { + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { const InstrItineraryData *II = TM->getInstrItineraryData(); - return new PPCHazardRecognizer440(II, DAG); + return new PPCScoreboardHazardRecognizer(II, DAG); } return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); @@ -65,14 +66,14 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // Most subtargets use a PPC970 recognizer. - if (Directive != PPC::DIR_440) { + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { const TargetInstrInfo *TII = TM.getInstrInfo(); assert(TII && "No InstrInfo?"); return new PPCHazardRecognizer970(*TII); } - return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); + return new PPCScoreboardHazardRecognizer(II, DAG); } unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { @@ -684,6 +685,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::GC_LABEL: case PPC::DBG_VALUE: return 0; + case PPC::BL8_NOP_ELF: + case PPC::BLA8_NOP_ELF: + return 8; default: return 4; // PowerPC instructions are all 4 bytes } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 939b71a..748486c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; @@ -542,6 +545,9 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; +def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), + (DCBT xoaddr:$dst)>; + // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -637,7 +643,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in -def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; //===----------------------------------------------------------------------===// // PPC32 Load Instructions. @@ -646,17 +652,17 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; // Unindexed (r+i) Loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), - "lbz $rD, $src", LdStGeneral, + "lbz $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), - "lhz $rD, $src", LdStGeneral, + "lhz $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), - "lwz $rD, $src", LdStGeneral, + "lwz $rD, $src", LdStLoad, [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), @@ -670,22 +676,22 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStGeneral, + "lbzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStGeneral, + "lhau $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStGeneral, + "lhzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStGeneral, + "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -705,25 +711,25 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStGeneral, + "lbzx $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStGeneral, + "lhzx $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStGeneral, + "lwzx $rD, $src", LdStLoad, [(set GPRC:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), - "lhbrx $rD, $src", LdStGeneral, + "lhbrx $rD, $src", LdStLoad, [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), - "lwbrx $rD, $src", LdStGeneral, + "lwbrx $rD, $src", LdStLoad, [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), @@ -741,13 +747,13 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), - "stb $rS, $src", LdStGeneral, + "stb $rS, $src", LdStStore, [(truncstorei8 GPRC:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), - "sth $rS, $src", LdStGeneral, + "sth $rS, $src", LdStStore, [(truncstorei16 GPRC:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), - "stw $rS, $src", LdStGeneral, + "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), "stfs $rS, $dst", LdStUX, @@ -761,33 +767,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + "stbu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + "sthu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStGeneral, + "stwu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStGeneral, + "stfsu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStGeneral, + "stfdu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -798,29 +804,29 @@ def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, // let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), - "stbx $rS, $dst", LdStGeneral, + "stbx $rS, $dst", LdStStore, [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), - "sthx $rS, $dst", LdStGeneral, + "sthx $rS, $dst", LdStStore, [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), - "stwx $rS, $dst", LdStGeneral, + "stwx $rS, $dst", LdStStore, [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; let mayStore = 1 in { def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), - "stwux $rS, $rA, $rB", LdStGeneral, + "stwux $rS, $rA, $rB", LdStStore, []>; } def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), - "sthbrx $rS, $dst", LdStGeneral, + "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), - "stwbrx $rS, $dst", LdStGeneral, + "stwbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 4590f00..a6528c0 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -291,9 +291,10 @@ void PPC64CompilationCallback() { } #endif -extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +extern "C" { +static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; @@ -337,6 +338,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, // stack after we restore all regs. return Target; } +} diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2976f01..ef13571 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -554,7 +554,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // clear can be encoded. This is extremely uncommon, because normally you // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. - if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm + (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 4e37d0a..8c0a858 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -50,7 +50,8 @@ def BrMCRX : InstrItinClass; def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; -def LdStGeneral : InstrItinClass; +def LdStLoad : InstrItinClass; +def LdStStore : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; def LdStUX : InstrItinClass; @@ -107,6 +108,7 @@ include "PPCSchedule440.td" include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" +include "PPCScheduleA2.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -150,8 +152,8 @@ include "PPCScheduleG5.td" // dcbf LdStDCBF // dcbi LdStDCBI // dcbst LdStDCBF -// dcbt LdStGeneral -// dcbtst LdStGeneral +// dcbt LdStLoad +// dcbtst LdStLoad // dcbz LdStDCBF // divd IntDivD // divdu IntDivD @@ -160,9 +162,9 @@ include "PPCScheduleG5.td" // dss LdStDSS // dst LdStDSS // dstst LdStDSS -// eciwx LdStGeneral -// ecowx LdStGeneral -// eieio LdStGeneral +// eciwx LdStLoad +// ecowx LdStLoad +// eieio LdStLoad // eqv IntGeneral // extsb IntGeneral // extsh IntGeneral @@ -202,10 +204,10 @@ include "PPCScheduleG5.td" // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC -// lbz LdStGeneral -// lbzu LdStGeneral +// lbz LdStLoad +// lbzu LdStLoad // lbzux LdStUX -// lbzx LdStGeneral +// lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX // ldu LdStLD @@ -223,11 +225,11 @@ include "PPCScheduleG5.td" // lhau LdStLHA // lhaux LdStLHA // lhax LdStLHA -// lhbrx LdStGeneral -// lhz LdStGeneral -// lhzu LdStGeneral +// lhbrx LdStLoad +// lhz LdStLoad +// lhzu LdStLoad // lhzux LdStUX -// lhzx LdStGeneral +// lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW // lswx LdStLMW @@ -242,11 +244,11 @@ include "PPCScheduleG5.td" // lwarx LdStLWARX // lwaux LdStLHA // lwax LdStLHA -// lwbrx LdStGeneral -// lwz LdStGeneral -// lwzu LdStGeneral +// lwbrx LdStLoad +// lwz LdStLoad +// lwzu LdStLoad // lwzux LdStUX -// lwzx LdStGeneral +// lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral // mcrxr BrMCRX @@ -307,10 +309,10 @@ include "PPCScheduleG5.td" // srawi IntShift // srd IntRotateD // srw IntGeneral -// stb LdStGeneral -// stbu LdStGeneral -// stbux LdStGeneral -// stbx LdStGeneral +// stb LdStStore +// stbu LdStStore +// stbux LdStStore +// stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX // stdu LdStSTD @@ -325,11 +327,11 @@ include "PPCScheduleG5.td" // stfsu LdStUX // stfsux LdStUX // stfsx LdStUX -// sth LdStGeneral -// sthbrx LdStGeneral -// sthu LdStGeneral -// sthux LdStGeneral -// sthx LdStGeneral +// sth LdStStore +// sthbrx LdStStore +// sthu LdStStore +// sthux LdStStore +// sthx LdStStore // stmw LdStLMW // stswi LdStLMW // stswx LdStLMW @@ -338,12 +340,12 @@ include "PPCScheduleG5.td" // stvewx LdStSTVEBX // stvx LdStSTVEBX // stvxl LdStSTVEBX -// stw LdStGeneral -// stwbrx LdStGeneral +// stw LdStStore +// stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStGeneral -// stwux LdStGeneral -// stwx LdStGeneral +// stwu LdStStore +// stwux LdStStore +// stwx LdStStore // subf IntGeneral // subfc IntGeneral // subfe IntGeneral diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index 76f7465..419faea 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -270,15 +270,23 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStGeneral , [InstrStage<1, [IFTH1, IFTH2]>, + InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, InstrStage<1, [LRACC]>, InstrStage<1, [AGEN]>, InstrStage<1, [CRD]>, InstrStage<2, [LWB]>], - [9, 5], // FIXME: should be [9, 5] for loads and - // [8, 5] for stores. + [9, 5], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], [NoBypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, @@ -345,6 +353,46 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td new file mode 100644 index 0000000..857ba40 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -0,0 +1,652 @@ +//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// A2 Processor User's Manual. +// IBM (as updated in) 2010. + +//===----------------------------------------------------------------------===// +// Functional units on the PowerPC A2 chip sets +// +def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1 +def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2 +def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3 +def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4 +def IU4_0 : FuncUnit; // Instruction buffer slot 1 +def IU4_1 : FuncUnit; // Instruction buffer slot 2 +def IU4_2 : FuncUnit; // Instruction buffer slot 3 +def IU4_3 : FuncUnit; // Instruction buffer slot 4 +def IU4_4 : FuncUnit; // Instruction buffer slot 5 +def IU4_5 : FuncUnit; // Instruction buffer slot 6 +def IU4_6 : FuncUnit; // Instruction buffer slot 7 +def IU4_7 : FuncUnit; // Instruction buffer slot 8 +def IU5 : FuncUnit; // Dependency resolution +def IU6 : FuncUnit; // Instruction issue +def RF0 : FuncUnit; +def XRF1 : FuncUnit; +def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline +def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline +def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline +def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline +def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline +def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline +def FRF1 : FuncUnit; +def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline +def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline +def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline +def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline +def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline +def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline + +def CR_Bypass : Bypass; // The bypass for condition regs. +//def GPR_Bypass : Bypass; // The bypass for general-purpose regs. +//def FPR_Bypass : Bypass; // The bypass for floating-point regs. + +// +// This file defines the itinerary class data for the PPC A2 processor. +// +//===----------------------------------------------------------------------===// + + +def PPCA2Itineraries : ProcessorItineraries< + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3, + IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7, + IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6, + FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntGeneral , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>], + [53, 7, 7], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStore , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStICBI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStUX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStLFD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHA , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>, + InstrItinData<SprISYNC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, + InstrItinData<SprMFSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [GPR_Bypass, NoBypass]>, + InstrItinData<SprMTMSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, + InstrItinData<SprMFCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [GPR_Bypass, NoBypass]>, + InstrItinData<SprMFSPR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprRFI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprSC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [13, 7, 7], + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>, + InstrStage<71, [FEX1], 0>, + InstrStage<71, [FEX2], 0>, + InstrStage<71, [FEX3], 0>, + InstrStage<71, [FEX4], 0>, + InstrStage<71, [FEX5], 0>, + InstrStage<71, [FEX6]>], + [86, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>, + InstrStage<58, [FEX1], 0>, + InstrStage<58, [FEX2], 0>, + InstrStage<58, [FEX3], 0>, + InstrStage<58, [FEX4], 0>, + InstrStage<58, [FEX5], 0>, + InstrStage<58, [FEX6]>], + [73, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPSqrt , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>, + InstrStage<68, [FEX1], 0>, + InstrStage<68, [FEX2], 0>, + InstrStage<68, [FEX3], 0>, + InstrStage<68, [FEX4], 0>, + InstrStage<68, [FEX5], 0>, + InstrStage<68, [FEX6]>], + [86, 7], // FIXME: should be [86, 7] for double + // and [82, 7] for single. Likewise, + // the FEX? cycle count should be 68 + // for double and 64 for single. + [NoBypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7], + [FPR_Bypass, FPR_Bypass]> +]>; diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index e7e5498..bc926f7 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -32,7 +32,8 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index 87a3151..f7ec1e0 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -31,7 +31,8 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>, InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index f76557a..37ebfc5 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -34,7 +34,8 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index bc0820b..d1e40ce 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -35,7 +35,8 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>, InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c89fab3..f405b47 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -146,10 +146,11 @@ bool PPCSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - if (DarwinDirective == PPC::DIR_440) - return false; + if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2) + Mode = TargetSubtargetInfo::ANTIDEP_ALL; + else + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); if (isPPC64()) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 69fe50b..a275029 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -40,6 +40,7 @@ namespace PPC { DIR_7400, DIR_750, DIR_970, + DIR_A2, DIR_64 }; } @@ -144,6 +145,8 @@ public: /// isDarwin - True if this is any darwin platform. bool isDarwin() const { return TargetTriple.isMacOSX(); } + /// isBGP - True if this is a BG/P platform. + bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ba9c779..d113976 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -39,6 +39,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { + + // The binutils for the BG/P are too old for CFI. + if (Subtarget.isBGP()) + setMCUseCFI(false); } void PPC32TargetMachine::anchor() { } diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 1f69ffb..093255e 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2,22 +2,6 @@ Target Independent Opportunities: //===---------------------------------------------------------------------===// -With the recent changes to make the implicit def/use set explicit in -machineinstrs, we should change the target descriptions for 'call' instructions -so that the .td files don't list all the call-clobbered registers as implicit -defs. Instead, these should be added by the code generator (e.g. on the dag). - -This has a number of uses: - -1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions - for their different impdef sets. -2. Targets with multiple calling convs (e.g. x86) which have different clobber - sets don't need copies of call instructions. -3. 'Interprocedural register allocation' can be done to reduce the clobber sets - of calls. - -//===---------------------------------------------------------------------===// - We should recognized various "overflow detection" idioms and translate them into llvm.uadd.with.overflow and similar intrinsics. Here is a multiply idiom: @@ -961,6 +945,25 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts". //===---------------------------------------------------------------------===// +int g(int x) { return (x - 10) < 0; } +Should combine to "x <= 9" (the sub has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int g(int x) { return (x + 10) < 0; } +Should combine to "x < -10" (the add has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int f(int i, int j) { return i < j + 1; } +int g(int i, int j) { return j > i - 1; } +Should combine to "i <= j" (the add/sub has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + This was noticed in the entryblock for grokdeclarator in 403.gcc: %tmp = icmp eq i32 %decl_context, 4 @@ -2358,3 +2361,8 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; } should fold to x > y. //===---------------------------------------------------------------------===// + +int f(double x) { return __builtin_fabs(x) < 0.0; } +should fold to false. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index 616e1c5..f0e1354 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -14,10 +14,10 @@ #ifndef SPARCTARGETASMINFO_H #define SPARCTARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { + class StringRef; class Target; class SparcELFMCAsmInfo : public MCAsmInfo { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index 2fd9e3f..cba775a 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -15,9 +15,7 @@ #define SPARCMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target TheSparcTarget; extern Target TheSparcV9Target; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index ee12633..c3e6f16 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -832,22 +832,19 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { /// be zero. Op is expected to be a target specific node. Used by DAG /// combiner. void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { APInt KnownZero2, KnownOne2; - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. + KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case SPISD::SELECT_ICC: case SPISD::SELECT_FCC: - DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, - Depth+1); - DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, - Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index f483c96..cf43048 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -50,7 +50,6 @@ namespace llvm { /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 3acb4dd..acb7476 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -373,7 +373,7 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, // If the alignment is not a power of 2, round up to the next power of 2. // This happens for non-power-of-2 length vectors. if (Align & (Align-1)) - Align = llvm::NextPowerOf2(Align); + Align = NextPowerOf2(Align); return Align; } } diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 269958f..ec95ad4 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -56,7 +56,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "exp2f", "expm1", "expm1l", - "expl1f", + "expm1f", "fabs", "fabsl", "fabsf", @@ -95,6 +95,9 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "rint", "rintf", "rintl", + "round", + "roundf", + "roundl", "sin", "sinl", "sinf", @@ -155,6 +158,81 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) { TLI.setUnavailable(LibFunc::siprintf); TLI.setUnavailable(LibFunc::fiprintf); } + + if (T.getOS() == Triple::Win32) { + // Win32 does not support long double + TLI.setUnavailable(LibFunc::acosl); + TLI.setUnavailable(LibFunc::asinl); + TLI.setUnavailable(LibFunc::atanl); + TLI.setUnavailable(LibFunc::atan2l); + TLI.setUnavailable(LibFunc::ceill); + TLI.setUnavailable(LibFunc::copysignl); + TLI.setUnavailable(LibFunc::cosl); + TLI.setUnavailable(LibFunc::coshl); + TLI.setUnavailable(LibFunc::expl); + TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf + TLI.setUnavailable(LibFunc::fabsl); + TLI.setUnavailable(LibFunc::floorl); + TLI.setUnavailable(LibFunc::fmodl); + TLI.setUnavailable(LibFunc::logl); + TLI.setUnavailable(LibFunc::powl); + TLI.setUnavailable(LibFunc::sinl); + TLI.setUnavailable(LibFunc::sinhl); + TLI.setUnavailable(LibFunc::sqrtl); + TLI.setUnavailable(LibFunc::tanl); + TLI.setUnavailable(LibFunc::tanhl); + + // Win32 only has C89 math + TLI.setUnavailable(LibFunc::exp2); + TLI.setUnavailable(LibFunc::exp2f); + TLI.setUnavailable(LibFunc::exp2l); + TLI.setUnavailable(LibFunc::expm1); + TLI.setUnavailable(LibFunc::expm1f); + TLI.setUnavailable(LibFunc::expm1l); + TLI.setUnavailable(LibFunc::log2); + TLI.setUnavailable(LibFunc::log2f); + TLI.setUnavailable(LibFunc::log2l); + TLI.setUnavailable(LibFunc::log1p); + TLI.setUnavailable(LibFunc::log1pf); + TLI.setUnavailable(LibFunc::log1pl); + TLI.setUnavailable(LibFunc::nearbyint); + TLI.setUnavailable(LibFunc::nearbyintf); + TLI.setUnavailable(LibFunc::nearbyintl); + TLI.setUnavailable(LibFunc::rint); + TLI.setUnavailable(LibFunc::rintf); + TLI.setUnavailable(LibFunc::rintl); + TLI.setUnavailable(LibFunc::round); + TLI.setUnavailable(LibFunc::roundf); + TLI.setUnavailable(LibFunc::roundl); + TLI.setUnavailable(LibFunc::trunc); + TLI.setUnavailable(LibFunc::truncf); + TLI.setUnavailable(LibFunc::truncl); + + // Win32 provides some C99 math with mangled names + TLI.setAvailableWithName(LibFunc::copysign, "_copysign"); + + if (T.getArch() == Triple::x86) { + // Win32 on x86 implements single-precision math functions as macros + TLI.setUnavailable(LibFunc::acosf); + TLI.setUnavailable(LibFunc::asinf); + TLI.setUnavailable(LibFunc::atanf); + TLI.setUnavailable(LibFunc::atan2f); + TLI.setUnavailable(LibFunc::ceilf); + TLI.setUnavailable(LibFunc::copysignf); + TLI.setUnavailable(LibFunc::cosf); + TLI.setUnavailable(LibFunc::coshf); + TLI.setUnavailable(LibFunc::expf); + TLI.setUnavailable(LibFunc::floorf); + TLI.setUnavailable(LibFunc::fmodf); + TLI.setUnavailable(LibFunc::logf); + TLI.setUnavailable(LibFunc::powf); + TLI.setUnavailable(LibFunc::sinf); + TLI.setUnavailable(LibFunc::sinhf); + TLI.setUnavailable(LibFunc::sqrtf); + TLI.setUnavailable(LibFunc::tanf); + TLI.setUnavailable(LibFunc::tanhf); + } + } } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 1589604..2570e0d 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -28,7 +28,6 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index b4969ca..b9b2526 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -74,6 +75,27 @@ CodeModel::Model TargetMachine::getCodeModel() const { return CodeGenInfo->getCodeModel(); } +TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { + bool isLocal = GV->hasLocalLinkage(); + bool isDeclaration = GV->isDeclaration(); + // FIXME: what should we do for protected and internal visibility? + // For variables, is internal different from hidden? + bool isHidden = GV->hasHiddenVisibility(); + + if (getRelocationModel() == Reloc::PIC_ && + !Options.PositionIndependentExecutable) { + if (isLocal || isHidden) + return TLSModel::LocalDynamic; + else + return TLSModel::GeneralDynamic; + } else { + if (!isDeclaration || isHidden) + return TLSModel::LocalExec; + else + return TLSModel::InitialExec; + } +} + /// getOptLevel - Returns the optimization level: None, Less, /// Default, or Aggressive. CodeGenOpt::Level TargetMachine::getOptLevel() const { diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp new file mode 100644 index 0000000..d6bba8b --- /dev/null +++ b/lib/Target/TargetMachineC.cpp @@ -0,0 +1,197 @@ +//===-- TargetMachine.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVM-C part of TargetMachine.h +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Core.h" +#include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include <cassert> +#include <cstdlib> +#include <cstring> + +using namespace llvm; + + + +LLVMTargetRef LLVMGetFirstTarget() { + const Target* target = &*TargetRegistry::begin(); + return wrap(target); +} +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) { + return wrap(unwrap(T)->getNext()); +} + +const char * LLVMGetTargetName(LLVMTargetRef T) { + return unwrap(T)->getName(); +} + +const char * LLVMGetTargetDescription(LLVMTargetRef T) { + return unwrap(T)->getShortDescription(); +} + +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) { + return unwrap(T)->hasJIT(); +} + +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) { + return unwrap(T)->hasTargetMachine(); +} + +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) { + return unwrap(T)->hasMCAsmBackend(); +} + +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, + char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel) { + Reloc::Model RM; + switch (Reloc){ + case LLVMRelocStatic: + RM = Reloc::Static; + break; + case LLVMRelocPIC: + RM = Reloc::PIC_; + break; + case LLVMRelocDynamicNoPic: + RM = Reloc::DynamicNoPIC; + break; + default: + RM = Reloc::Default; + break; + } + + CodeModel::Model CM; + switch (CodeModel) { + case LLVMCodeModelJITDefault: + CM = CodeModel::JITDefault; + break; + case LLVMCodeModelSmall: + CM = CodeModel::Small; + break; + case LLVMCodeModelKernel: + CM = CodeModel::Kernel; + break; + case LLVMCodeModelMedium: + CM = CodeModel::Medium; + break; + case LLVMCodeModelLarge: + CM = CodeModel::Large; + break; + default: + CM = CodeModel::Default; + break; + } + CodeGenOpt::Level OL; + + switch (Level) { + case LLVMCodeGenLevelNone: + OL = CodeGenOpt::None; + break; + case LLVMCodeGenLevelLess: + OL = CodeGenOpt::Less; + break; + case LLVMCodeGenLevelAggressive: + OL = CodeGenOpt::Aggressive; + break; + default: + OL = CodeGenOpt::Default; + break; + } + + TargetOptions opt; + return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM, + CM, OL)); +} + + +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { + delete unwrap(T); +} + +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) { + const Target* target = &(unwrap(T)->getTarget()); + return wrap(target); +} + +char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetTriple(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetCPU(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetFeatureString(); + return strdup(StringRep.c_str()); +} + +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { + return wrap(unwrap(T)->getTargetData()); +} + +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { + TargetMachine* TM = unwrap(T); + Module* Mod = unwrap(M); + + PassManager pass; + + std::string error; + + const TargetData* td = TM->getTargetData(); + + if (!td) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + pass.add(new TargetData(*td)); + + TargetMachine::CodeGenFileType ft; + switch (codegen) { + case LLVMAssemblyFile: + ft = TargetMachine::CGFT_AssemblyFile; + break; + default: + ft = TargetMachine::CGFT_ObjectFile; + break; + } + raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); + formatted_raw_ostream destf(dest); + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + + if (TM->addPassesToEmitFile(pass, destf, ft)) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + + pass.run(*Mod); + + destf.flush(); + dest.flush(); + return false; +} diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 9e88472..08c732c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -17,7 +17,6 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" @@ -951,20 +950,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { - bool IsVCMP = PatchedName.startswith("vcmp"); + bool IsVCMP = PatchedName[0] == 'v'; unsigned SSECCIdx = IsVCMP ? 4 : 3; unsigned SSEComparisonCode = StringSwitch<unsigned>( PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) - .Case("eq", 0) - .Case("lt", 1) - .Case("le", 2) - .Case("unord", 3) - .Case("neq", 4) - .Case("nlt", 5) - .Case("nle", 6) - .Case("ord", 7) - .Case("eq_uq", 8) - .Case("nge", 9) + .Case("eq", 0x00) + .Case("lt", 0x01) + .Case("le", 0x02) + .Case("unord", 0x03) + .Case("neq", 0x04) + .Case("nlt", 0x05) + .Case("nle", 0x06) + .Case("ord", 0x07) + /* AVX only from here */ + .Case("eq_uq", 0x08) + .Case("nge", 0x09) .Case("ngt", 0x0A) .Case("false", 0x0B) .Case("neq_oq", 0x0C) @@ -988,7 +988,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, .Case("gt_oq", 0x1E) .Case("true_us", 0x1F) .Default(~0U); - if (SSEComparisonCode != ~0U) { + if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, getParser().getContext()); if (PatchedName.endswith("ss")) { diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 8278bde..b13a006 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -322,7 +322,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, OperandType type = (OperandType)operand.type; + bool isBranch = false; + uint64_t pcrel = 0; if (type == TYPE_RELv) { + isBranch = true; + pcrel = insn.startLocation + + insn.displacementOffset + insn.displacementSize; switch (insn.displacementSize) { default: break; @@ -373,8 +378,6 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } } - bool isBranch = false; - uint64_t pcrel = 0; switch (type) { case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index fbd81d2..6020877 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1527,6 +1527,9 @@ static int readOperands(struct InternalInstruction* insn) { if (insn->spec->operands[index].type == TYPE_IMM3 && insn->immediates[insn->numImmediatesConsumed - 1] > 7) return -1; + if (insn->spec->operands[index].type == TYPE_IMM5 && + insn->immediates[insn->numImmediatesConsumed - 1] > 31) + return -1; if (insn->spec->operands[index].type == TYPE_XMM128 || insn->spec->operands[index].type == TYPE_XMM256) sawRegImm = 1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index d2e30f1..13e1136 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -273,6 +273,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_IMM32, "4-byte") \ ENUM_ENTRY(TYPE_IMM64, "8-byte") \ ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ + ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \ ENUM_ENTRY(TYPE_RM16, "2-byte") \ ENUM_ENTRY(TYPE_RM32, "4-byte") \ diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index b7ccb4c..5118e4c 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -19,6 +19,8 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" @@ -26,7 +28,6 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. -#define GET_INSTRUCTION_NAME #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" @@ -49,10 +50,6 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } -StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index ff94301..2e00bff 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -22,12 +22,12 @@ class MCOperand; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen, returns true if we successfully printed an // alias. @@ -36,7 +36,6 @@ public: // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 46a96d2..4ea662c 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -17,15 +17,13 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include <cctype> using namespace llvm; -// Include the auto-generated portion of the assembly writer. -#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { @@ -43,9 +41,6 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, if (CommentStream) EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } -StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index ea1d38a..4f5938d 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -23,17 +23,16 @@ class MCOperand; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 3f770f7..32e40fe 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -9,7 +9,6 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 003a14a..afa545c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -155,4 +155,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; + + // Exceptions handling + ExceptionsType = ExceptionHandling::DwarfCFI; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index efd18c7..3482363 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -474,12 +474,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createX86MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI, MRI); + return new X86ATTInstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(MAI, MRI); + return new X86IntelInstPrinter(MAI, MII, MRI); return 0; } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index f9c1d35..6a8a4fd 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -2060,3 +2060,21 @@ Instead we could generate: The trick is to match "fetch_and_add(X, -C) == C". //===---------------------------------------------------------------------===// + +unsigned t(unsigned a, unsigned b) { + return a <= b ? 5 : -5; +} + +We generate: + movl $5, %ecx + cmpl %esi, %edi + movl $-5, %eax + cmovbel %ecx, %eax + +GCC: + cmpl %edi, %esi + sbbl %eax, %eax + andl $-10, %eax + addl $5, %eax + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 32c722a..a802333 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -169,6 +169,9 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { + if (Imm & 0x88) + return; // Not a shuffle + unsigned HalfSize = VT.getVectorNumElements()/2; unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index f1cedf3..7db7ccb 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -19,7 +19,6 @@ #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" #include "InstPrinter/X86ATTInstPrinter.h" -#include "InstPrinter/X86IntelInstPrinter.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -265,8 +264,8 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O) { unsigned char value = MI->getOperand(Op).getImm(); - assert(value <= 7 && "Invalid ssecc argument!"); switch (value) { + default: llvm_unreachable("Invalid ssecc argument!"); case 0: O << "eq"; break; case 1: O << "lt"; break; case 2: O << "le"; break; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3d63b7e..69752c5 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2179,7 +2179,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { - llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { return new X86FastISel(funcInfo); } } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 936df27..ed1707d 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -28,7 +28,6 @@ #include "X86InstrInfo.h" #include "llvm/InlineAsm.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -219,7 +218,7 @@ namespace { /// getSTReg - Return the X86::ST(i) register which contains the specified /// FP<RegNo> register. unsigned getSTReg(unsigned RegNo) const { - return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; + return StackTop - 1 - getSlot(RegNo) + X86::ST0; } // pushReg - Push the specified FP<n> register onto the stack. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 9405c2f..8e2b1d6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -621,14 +620,14 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // Handle X86-64 rip-relative addresses. We check this before checking direct // folding because RIP is preferable to non-RIP accesses. - if (Subtarget->is64Bit() && + if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP && // Under X86-64 non-small code model, GV (and friends) are 64-bits, so // they cannot be folded into immediate fields. // FIXME: This can be improved for kernel and other models? - (M == CodeModel::Small || M == CodeModel::Kernel) && - // Base and index reg must be 0 in order to use %rip as base and lowering - // must allow RIP. - !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { + (M == CodeModel::Small || M == CodeModel::Kernel)) { + // Base and index reg must be 0 in order to use %rip as base. + if (AM.hasBaseOrIndexReg()) + return true; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { X86ISelAddressMode Backup = AM; AM.GV = G->getGlobal(); @@ -663,11 +662,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } // Handle the case when globals fit in our immediate field: This is true for - // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit - // mode, this results in a non-RIP-relative computation. + // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit + // mode, this only applies to a non-RIP-relative computation. if (!Subtarget->is64Bit() || - ((M == CodeModel::Small || M == CodeModel::Kernel) && - TM.getRelocationModel() == Reloc::Static)) { + M == CodeModel::Small || M == CodeModel::Kernel) { + assert(N.getOpcode() != X86ISD::WrapperRIP && + "RIP-relative addressing already handled"); if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { AM.GV = G->getGlobal(); AM.Disp += G->getOffset(); @@ -897,7 +897,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(), MaskLZ); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(X, MaskedHighBits, KnownZero, KnownOne); + DAG.ComputeMaskedBits(X, KnownZero, KnownOne); if (MaskedHighBits != KnownZero) return true; // We've identified a pattern that can be transformed into a single shift @@ -1848,6 +1848,96 @@ static bool HasNoSignedComparisonUses(SDNode *N) { return true; } +/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode +/// is suitable for doing the {load; increment or decrement; store} to modify +/// transformation. +static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, + SDValue StoredVal, SelectionDAG *CurDAG, + LoadSDNode* &LoadNode, SDValue &InputChain) { + + // is the value stored the result of a DEC or INC? + if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false; + + // is the stored value result 0 of the load? + if (StoredVal.getResNo() != 0) return false; + + // are there other uses of the loaded value than the inc or dec? + if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; + + // is the store non-extending and non-indexed? + if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) + return false; + + SDValue Load = StoredVal->getOperand(0); + // Is the stored value a non-extending and non-indexed load? + if (!ISD::isNormalLoad(Load.getNode())) return false; + + // Return LoadNode by reference. + LoadNode = cast<LoadSDNode>(Load); + // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) + EVT LdVT = LoadNode->getMemoryVT(); + if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && + LdVT != MVT::i8) + return false; + + // Is store the only read of the loaded value? + if (!Load.hasOneUse()) + return false; + + // Is the address of the store the same as the load? + if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || + LoadNode->getOffset() != StoreNode->getOffset()) + return false; + + // Check if the chain is produced by the load or is a TokenFactor with + // the load output chain as an operand. Return InputChain by reference. + SDValue Chain = StoreNode->getChain(); + + bool ChainCheck = false; + if (Chain == Load.getValue(1)) { + ChainCheck = true; + InputChain = LoadNode->getChain(); + } else if (Chain.getOpcode() == ISD::TokenFactor) { + SmallVector<SDValue, 4> ChainOps; + for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { + SDValue Op = Chain.getOperand(i); + if (Op == Load.getValue(1)) { + ChainCheck = true; + continue; + } + ChainOps.push_back(Op); + } + + if (ChainCheck) + // Make a new TokenFactor with all the other input chains except + // for the load. + InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(), + MVT::Other, &ChainOps[0], ChainOps.size()); + } + if (!ChainCheck) + return false; + + return true; +} + +/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory +/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC. +static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { + if (Opc == X86ISD::DEC) { + if (LdVT == MVT::i64) return X86::DEC64m; + if (LdVT == MVT::i32) return X86::DEC32m; + if (LdVT == MVT::i16) return X86::DEC16m; + if (LdVT == MVT::i8) return X86::DEC8m; + } else { + assert(Opc == X86ISD::INC && "unrecognized opcode"); + if (LdVT == MVT::i64) return X86::INC64m; + if (LdVT == MVT::i32) return X86::INC32m; + if (LdVT == MVT::i16) return X86::INC16m; + if (LdVT == MVT::i8) return X86::INC8m; + } + llvm_unreachable("unrecognized size for LdVT"); +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; @@ -2355,9 +2445,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; } case ISD::STORE: { + // Change a chain of {load; incr or dec; store} of the same value into + // a simple increment or decrement through memory of that value, if the + // uses of the modified value and its address are suitable. // The DEC64m tablegen pattern is currently not able to match the case where - // the EFLAGS on the original DEC are used. - // we'll need to improve tablegen to allow flags to be transferred from a + // the EFLAGS on the original DEC are used. (This also applies to + // {INC,DEC}X{64,32,16,8}.) + // We'll need to improve tablegen to allow flags to be transferred from a // node in the pattern to the result node. probably with a new keyword // for example, we have this // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", @@ -2367,44 +2461,17 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", // [(store (add (loadi64 addr:$dst), -1), addr:$dst), // (transferrable EFLAGS)]>; + StoreSDNode *StoreNode = cast<StoreSDNode>(Node); - SDValue Chain = StoreNode->getOperand(0); SDValue StoredVal = StoreNode->getOperand(1); - SDValue Address = StoreNode->getOperand(2); - SDValue Undef = StoreNode->getOperand(3); - - if (StoreNode->getMemOperand()->getSize() != 8 || - Undef->getOpcode() != ISD::UNDEF || - Chain->getOpcode() != ISD::LOAD || - StoredVal->getOpcode() != X86ISD::DEC || - StoredVal.getResNo() != 0 || - !StoredVal.getNode()->hasNUsesOfValue(1, 0) || - !Chain.getNode()->hasNUsesOfValue(1, 0) || - StoredVal->getOperand(0).getNode() != Chain.getNode()) - break; + unsigned Opc = StoredVal->getOpcode(); - //OPC_CheckPredicate, 1, // Predicate_nontemporalstore - if (StoreNode->isNonTemporal()) + LoadSDNode *LoadNode = 0; + SDValue InputChain; + if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, + LoadNode, InputChain)) break; - LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode()); - if (LoadNode->getOperand(1) != Address || - LoadNode->getOperand(2) != Undef) - break; - - if (!ISD::isNormalLoad(LoadNode)) - break; - - if (!ISD::isNormalStore(StoreNode)) - break; - - // check load chain has only one use (from the store) - if (!Chain.hasOneUse()) - break; - - // Merge the input chains if they are not intra-pattern references. - SDValue InputChain = LoadNode->getOperand(0); - SDValue Base, Scale, Index, Disp, Segment; if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, Segment)) @@ -2414,7 +2481,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MemOp[0] = StoreNode->getMemOperand(); MemOp[1] = LoadNode->getMemOperand(); const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; - MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m, + EVT LdVT = LoadNode->getMemoryVT(); + unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); + MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), MVT::i32, MVT::Other, Ops, array_lengthof(Ops)); @@ -2465,6 +2534,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, - llvm::CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel) { return new X86DAGToDAGISel(TM, OptLevel); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 88f3829..04299f3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1578,18 +1578,20 @@ X86TargetLowering::LowerReturn(SDValue Chain, MVT::Other, &RetOps[0], RetOps.size()); } -bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { +bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; + SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; + TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() != ISD::FP_EXTEND) return false; @@ -1601,7 +1603,11 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { HasRet = true; } - return HasRet; + if (!HasRet) + return false; + + Chain = TCChain; + return true; } EVT @@ -2929,6 +2935,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::VPERMILP: + case X86ISD::VPERMI: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } } @@ -3970,6 +3977,27 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { return Index / NumElemsPerChunk; } +/// getShuffleCLImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. +/// Handles 256-bit. +static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumElts = VT.getVectorNumElements(); + + assert((VT.is256BitVector() && NumElts == 4) && + "Unsupported vector type for VPERMQ/VPERMPD"); + + unsigned Mask = 0; + for (unsigned i = 0; i != NumElts; ++i) { + int Elt = N->getMaskElt(i); + if (Elt < 0) + continue; + Mask |= Elt << (i*2); + } + + return Mask; +} /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { @@ -4402,6 +4430,7 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT, case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + if (Mask.empty()) return false; break; case X86ISD::MOVDDUP: case X86ISD::MOVLHPD: @@ -4852,41 +4881,42 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, return SDValue(); } -/// isVectorBroadcast - Check if the node chain is suitable to be xformed to -/// a vbroadcast node. We support two patterns: -/// 1. A splat BUILD_VECTOR which uses a single scalar load. +/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction +/// to generate a splat value for the following cases: +/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant. /// 2. A splat shuffle which uses a scalar_to_vector node which comes from -/// a scalar load. -/// The scalar load node is returned when a pattern is found, +/// a scalar load, or a constant. +/// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. -static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { +SDValue +X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { if (!Subtarget->hasAVX()) return SDValue(); EVT VT = Op.getValueType(); - SDValue V = Op; - - if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); - //A suspected load to be broadcasted. SDValue Ld; + bool ConstSplatVal; - switch (V.getOpcode()) { + switch (Op.getOpcode()) { default: // Unknown pattern found. return SDValue(); case ISD::BUILD_VECTOR: { // The BUILD_VECTOR node must be a splat. - if (!isSplatVector(V.getNode())) + if (!isSplatVector(Op.getNode())) return SDValue(); - Ld = V.getOperand(0); + Ld = Op.getOperand(0); + ConstSplatVal = (Ld.getOpcode() == ISD::Constant || + Ld.getOpcode() == ISD::ConstantFP); // The suspected load node has several users. Make sure that all // of its users are from the BUILD_VECTOR node. - if (!Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) + // Constants may have multiple users. + if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) return SDValue(); break; } @@ -4904,15 +4934,50 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { return SDValue(); Ld = Sc.getOperand(0); + ConstSplatVal = (Ld.getOpcode() == ISD::Constant || + Ld.getOpcode() == ISD::ConstantFP); // The scalar_to_vector node and the suspected // load node must have exactly one user. - if (!Sc.hasOneUse() || !Ld.hasOneUse()) + // Constants may have multiple users. + if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse())) return SDValue(); break; } } + bool Is256 = VT.getSizeInBits() == 256; + bool Is128 = VT.getSizeInBits() == 128; + + // Handle the broadcasting a single constant scalar from the constant pool + // into a vector. On Sandybridge it is still better to load a constant vector + // from the constant pool and not to broadcast it from a scalar. + if (ConstSplatVal && Subtarget->hasAVX2()) { + EVT CVT = Ld.getValueType(); + assert(!CVT.isVector() && "Must not broadcast a vector type"); + unsigned ScalarSize = CVT.getSizeInBits(); + + if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) || + (Is128 && (ScalarSize == 32))) { + + const Constant *C = 0; + if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) + C = CI->getConstantIntValue(); + else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld)) + C = CF->getConstantFPValue(); + + assert(C && "Invalid constant type"); + + SDValue CP = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); + Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); + } + } + // The scalar source must be a normal load. if (!ISD::isNormalLoad(Ld.getNode())) return SDValue(); @@ -4921,28 +4986,26 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { if (Ld->hasAnyUseOfValue(1)) return SDValue(); - bool Is256 = VT.getSizeInBits() == 256; - bool Is128 = VT.getSizeInBits() == 128; unsigned ScalarSize = Ld.getValueType().getSizeInBits(); // VBroadcast to YMM if (Is256 && (ScalarSize == 32 || ScalarSize == 64)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // VBroadcast to XMM if (Is128 && (ScalarSize == 32)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The integer check is needed for the 64-bit into 128-bit so it doesn't match // double since there is vbroadcastsd xmm if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) { // VBroadcast to YMM if (Is256 && (ScalarSize == 8 || ScalarSize == 16)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // VBroadcast to XMM if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } // Unsupported broadcast. @@ -4977,9 +5040,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl); } - SDValue LD = isVectorBroadcast(Op, Subtarget); - if (LD.getNode()) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + SDValue Broadcast = LowerVectorBroadcast(Op, DAG); + if (Broadcast.getNode()) + return Broadcast; unsigned EVTBits = ExtVT.getSizeInBits(); @@ -5343,6 +5406,85 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { return LowerAVXCONCAT_VECTORS(Op, DAG); } +// Try to lower a shuffle node into a simple blend instruction. +static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + EVT VT = Op.getValueType(); + EVT InVT = V1.getValueType(); + int MaskSize = VT.getVectorNumElements(); + int InSize = InVT.getVectorNumElements(); + + if (!Subtarget->hasSSE41()) + return SDValue(); + + if (MaskSize != InSize) + return SDValue(); + + int ISDNo = 0; + MVT OpTy; + + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v8i16: + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v8i16; + break; + case MVT::v4i32: + case MVT::v4f32: + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v4f32; + break; + case MVT::v2i64: + case MVT::v2f64: + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v2f64; + break; + case MVT::v8i32: + case MVT::v8f32: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v8f32; + break; + case MVT::v4i64: + case MVT::v4f64: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v4f64; + break; + case MVT::v16i16: + if (!Subtarget->hasAVX2()) + return SDValue(); + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v16i16; + break; + } + assert(ISDNo && "Invalid Op Number"); + + unsigned MaskVals = 0; + + for (int i = 0; i < MaskSize; ++i) { + int EltIdx = SVOp->getMaskElt(i); + if (EltIdx == i || EltIdx == -1) + MaskVals |= (1<<i); + else if (EltIdx == (i + MaskSize)) + continue; // Bit is set to zero; + else return SDValue(); + } + + V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); + SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2, + DAG.getConstant(MaskVals, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Ret); +} + // v8i16 shuffles - Prefer shuffles in the following order: // 1. [all] pshuflw, pshufhw, optional move // 2. [ssse3] 1 x pshufb @@ -5836,96 +5978,79 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { unsigned NumElems = VT.getVectorNumElements(); unsigned NumLaneElems = NumElems / 2; - int MinRange[2][2] = { { static_cast<int>(NumElems), - static_cast<int>(NumElems) }, - { static_cast<int>(NumElems), - static_cast<int>(NumElems) } }; - int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } }; + DebugLoc dl = SVOp->getDebugLoc(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); + SDValue Shufs[2]; - // Collect used ranges for each source in each lane + SmallVector<int, 16> Mask; for (unsigned l = 0; l < 2; ++l) { - unsigned LaneStart = l*NumLaneElems; + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + int InputUsed[2] = { -1, -1 }; // Not yet discovered. + unsigned LaneStart = l * NumLaneElems; for (unsigned i = 0; i != NumLaneElems; ++i) { + // The mask element. This indexes into the input. int Idx = SVOp->getMaskElt(i+LaneStart); - if (Idx < 0) + if (Idx < 0) { + // the mask element does not index into any input vector. + Mask.push_back(-1); continue; - - int Input = 0; - if (Idx >= (int)NumElems) { - Idx -= NumElems; - Input = 1; } - if (Idx > MaxRange[l][Input]) - MaxRange[l][Input] = Idx; - if (Idx < MinRange[l][Input]) - MinRange[l][Input] = Idx; - } - } + // The input vector this mask element indexes into. + int Input = Idx / NumLaneElems; - // Make sure each range is 128-bits - int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } }; - for (unsigned l = 0; l < 2; ++l) { - for (unsigned Input = 0; Input < 2; ++Input) { - if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0) - continue; + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NumLaneElems; - if (MinRange[l][Input] >= 0 && MaxRange[l][Input] < (int)NumLaneElems) - ExtractIdx[l][Input] = 0; - else if (MinRange[l][Input] >= (int)NumLaneElems && - MaxRange[l][Input] < (int)NumElems) - ExtractIdx[l][Input] = NumLaneElems; - else - return SDValue(); - } - } + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) + // This input vector is already an operand. + break; + if (InputUsed[OpNo] < 0) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } - DebugLoc dl = SVOp->getDebugLoc(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NVT = MVT::getVectorVT(EltVT, NumElems/2); + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up. + return SDValue(); + } - SDValue Ops[2][2]; - for (unsigned l = 0; l < 2; ++l) { - for (unsigned Input = 0; Input < 2; ++Input) { - if (ExtractIdx[l][Input] >= 0) - Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input), - DAG.getConstant(ExtractIdx[l][Input], MVT::i32), - DAG, dl); - else - Ops[l][Input] = DAG.getUNDEF(NVT); + // Add the mask index for the new shuffle vector. + Mask.push_back(Idx + OpNo * NumLaneElems); } - } - // Generate 128-bit shuffles - SmallVector<int, 16> Mask1, Mask2; - for (unsigned i = 0; i != NumLaneElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (Elt >= (int)NumElems) { - Elt %= NumLaneElems; - Elt += NumLaneElems; - } else if (Elt >= 0) { - Elt %= NumLaneElems; - } - Mask1.push_back(Elt); - } - for (unsigned i = NumLaneElems; i != NumElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (Elt >= (int)NumElems) { - Elt %= NumLaneElems; - Elt += NumLaneElems; - } else if (Elt >= 0) { - Elt %= NumLaneElems; + if (InputUsed[0] < 0) { + // No input vectors were used! The result is undefined. + Shufs[l] = DAG.getUNDEF(NVT); + } else { + SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2), + DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32), + DAG, dl); + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) : + Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2), + DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32), + DAG, dl); + // At least one input vector was used. Create a new shuffle vector. + Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]); } - Mask2.push_back(Elt); - } - SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]); - SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]); + Mask.clear(); + } // Concatenate the result back - SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1, + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0], DAG.getConstant(0, MVT::i32), DAG, dl); - return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32), + return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32), DAG, dl); } @@ -6203,10 +6328,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { getShuffleSHUFImmediate(SVOp), DAG); } -static -SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI, - const X86Subtarget *Subtarget) { +SDValue +X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -6222,9 +6345,9 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, int Size = VT.getSizeInBits(); // Use vbroadcast whenever the splat comes from a foldable load - SDValue LD = isVectorBroadcast(Op, Subtarget); - if (LD.getNode()) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + SDValue Broadcast = LowerVectorBroadcast(Op, DAG); + if (Broadcast.getNode()) + return Broadcast; // Handle splats by matching through known shuffle masks if ((Size == 128 && NumElem <= 4) || @@ -6309,7 +6432,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Normalize the input vectors. Here splats, zeroed vectors, profitable // narrowing and commutation of operands should be handled. The actual code // doesn't include all of those, work in progress... - SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget); + SDValue NewOp = NormalizeVectorShuffle(Op, DAG); if (NewOp.getNode()) return NewOp; @@ -6524,6 +6647,27 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); + SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG); + if (BlendOp.getNode()) + return BlendOp; + + if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) { + SmallVector<SDValue, 8> permclMask; + for (unsigned i = 0; i != 8; ++i) { + permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32)); + } + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, + &permclMask[0], 8); + // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32 + return DAG.getNode(X86ISD::VPERMV, dl, VT, + DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1); + } + + if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64)) + return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, + getShuffleCLImmediate(SVOp), DAG); + + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -7182,8 +7326,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) GV = GA->resolveAliasedGlobal(false); - TLSModel::Model model - = getTLSModel(GV, getTargetMachine().getRelocationModel()); + TLSModel::Model model = getTargetMachine().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: @@ -8099,8 +8242,8 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, unsigned BitWidth = Op0.getValueSizeInBits(); unsigned AndBitWidth = And.getValueSizeInBits(); if (BitWidth > AndBitWidth) { - APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones; - DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones); + APInt Zeros, Ones; + DAG.ComputeMaskedBits(Op0, Zeros, Ones); if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) return SDValue(); } @@ -9449,12 +9592,12 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_vperm2i128: return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::x86_avx_vpermil_ps: - case Intrinsic::x86_avx_vpermil_pd: - case Intrinsic::x86_avx_vpermil_ps_256: - case Intrinsic::x86_avx_vpermil_pd_256: - return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_permd: + case Intrinsic::x86_avx2_permps: + // Operands intentionally swapped. Mask is last operand to intrinsic, + // but second operand for node/intruction. + return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(1)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest @@ -10963,6 +11106,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; + case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; + case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; @@ -11035,6 +11181,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; + case X86ISD::VPERMV: return "X86ISD::VPERMV"; + case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; @@ -11192,14 +11340,15 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, unsigned notOpc, unsigned EAXreg, const TargetRegisterClass *RC, - bool invSrc) const { + bool Invert) const { // For the atomic bitwise operator, we generate // thisMBB: // newMBB: // ld t1 = [bitinstr.addr] // op t2 = t1, [bitinstr.val] + // not t3 = t2 (if Invert) // mov EAX = t1 - // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // lcs dest = [bitinstr.addr], t3 [EAX is implicit] // bz newMBB // fallthrough -->nextMBB const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -11247,13 +11396,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); - unsigned tt = F->getRegInfo().createVirtualRegister(RC); - if (invSrc) { - MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1); - } - else - tt = t1; - unsigned t2 = F->getRegInfo().createVirtualRegister(RC); assert((argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) && @@ -11262,16 +11404,23 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2); else MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2); - MIB.addReg(tt); + MIB.addReg(t1); (*MIB).addOperand(*argOpers[valArgIndx]); + unsigned t3 = F->getRegInfo().createVirtualRegister(RC); + if (Invert) { + MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2); + } + else + t3 = t2; + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); - MIB.addReg(t2); + MIB.addReg(t3); assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); @@ -11294,7 +11443,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, unsigned regOpcH, unsigned immOpcL, unsigned immOpcH, - bool invSrc) const { + bool Invert) const { // For the atomic bitwise operator, we generate // thisMBB (instructions are in pairs, except cmpxchg8b) // ld t1,t2 = [bitinstr.addr] @@ -11302,6 +11451,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) // op t5, t6 <- out1, out2, [bitinstr.val] // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) + // neg t7, t8 < t5, t6 (if Invert) // mov ECX, EBX <- t5, t6 // mov EAX, EDX <- t1, t2 // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] @@ -11385,16 +11535,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); // The subsequent operations should be using the destination registers of - //the PHI instructions. - if (invSrc) { - t1 = F->getRegInfo().createVirtualRegister(RC); - t2 = F->getRegInfo().createVirtualRegister(RC); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg()); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg()); - } else { - t1 = dest1Oper.getReg(); - t2 = dest2Oper.getReg(); - } + // the PHI instructions. + t1 = dest1Oper.getReg(); + t2 = dest2Oper.getReg(); int valArgIndx = lastAddrIndx + 1; assert((argOpers[valArgIndx]->isReg() || @@ -11421,15 +11564,26 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); + unsigned t7, t8; + if (Invert) { + t7 = F->getRegInfo().createVirtualRegister(RC); + t8 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6); + } else { + t7 = t5; + t8 = t6; + } + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); MIB.addReg(t2); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); - MIB.addReg(t5); + MIB.addReg(t7); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); - MIB.addReg(t6); + MIB.addReg(t8); MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); for (int i=0; i <= lastAddrIndx; ++i) @@ -12620,11 +12774,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, //===----------------------------------------------------------------------===// void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { + unsigned BitWidth = KnownZero.getBitWidth(); unsigned Opc = Op.getOpcode(); assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN || @@ -12633,7 +12787,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. switch (Opc) { default: break; case X86ISD::ADD: @@ -12652,8 +12806,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; // Fallthrough case X86ISD::SETCC: - KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), - Mask.getBitWidth() - 1); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ISD::INTRINSIC_WO_CHAIN: { unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -12678,8 +12831,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break; case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break; } - KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), - Mask.getBitWidth() - NumLoBits); + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits); break; } } @@ -14000,13 +14152,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. - if (Mask.getOpcode() != ISD::BITCAST || - X.getOpcode() != ISD::BITCAST || - Y.getOpcode() != ISD::BITCAST) - return SDValue(); - // Look through mask bitcast. - Mask = Mask.getOperand(0); + if (Mask.getOpcode() == ISD::BITCAST) + Mask = Mask.getOperand(0); + if (X.getOpcode() == ISD::BITCAST) + X = X.getOperand(0); + if (Y.getOpcode() == ISD::BITCAST) + Y = Y.getOperand(0); + EVT MaskVT = Mask.getValueType(); // Validate that the Mask operand is a vector sra node. @@ -14027,8 +14180,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // Now we know we at least have a plendvb with the mask val. See if // we can form a psignb/w/d. // psign = x.type == y.type == mask.type && y = sub(0, x); - X = X.getOperand(0); - Y = Y.getOperand(0); if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0327b1f..09116e8 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -175,9 +175,14 @@ namespace llvm { /// PSIGN - Copy integer sign. PSIGN, - /// BLEND family of opcodes + /// BLENDV - Blend where the selector is an XMM. BLENDV, + /// BLENDxx - Blend where the selector is an immediate. + BLENDPW, + BLENDPS, + BLENDPD, + /// HADD - Integer horizontal add. HADD, @@ -280,6 +285,8 @@ namespace llvm { UNPCKL, UNPCKH, VPERMILP, + VPERMV, + VPERMI, VPERM2X128, VBROADCAST, @@ -504,7 +511,6 @@ namespace llvm { /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -781,6 +787,8 @@ namespace llvm { // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; + SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -804,7 +812,7 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; @@ -849,7 +857,7 @@ namespace llvm { unsigned notOpc, unsigned EAXreg, const TargetRegisterClass *RC, - bool invSrc = false) const; + bool Invert = false) const; MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( MachineInstr *BInstr, @@ -858,7 +866,7 @@ namespace llvm { unsigned regOpcH, unsigned immOpcL, unsigned immOpcH, - bool invSrc = false) const; + bool Invert = false) const; /// Utility function to emit atomic min and max. It takes the min/max /// instruction to expand, the associated basic block, and the associated diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 7fa7499..0eee083 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -53,7 +53,7 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)]>; // AL,AH = AL*GR8 + (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), @@ -97,31 +97,32 @@ def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), let neverHasSideEffects = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; - // AL,AH = AL*GR8 +def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], + IIC_IMUL8>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, - OpSize; // AX,DX = AX*GR16 +def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], + IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; - // EAX,EDX = EAX*GR32 +def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], + IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>; - // RAX,RDX = RAX*GR64 +def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], + IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; + // AX,DX = AX*[mem16] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] } } // neverHasSideEffects @@ -639,10 +640,11 @@ class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, // BinOpRR - Instructions like "add reg, reg, reg". class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, list<dag> pattern, Format f = MRMDestReg> + dag outlist, list<dag> pattern, InstrItinClass itin, + Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -650,7 +652,8 @@ class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + IIC_BIN_NONMEM>; // BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has // just a EFLAGS as a result. @@ -659,7 +662,7 @@ class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRR<opcode, mnemonic, typeinfo, (outs), [(set EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], - f>; + IIC_BIN_NONMEM, f>; // BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result. @@ -667,7 +670,8 @@ class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + IIC_BIN_NONMEM>; // BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result, and has EFLAGS as input. @@ -676,14 +680,14 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2, - EFLAGS))]>; + EFLAGS))], IIC_BIN_NONMEM>; // BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding). class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", []> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; } @@ -692,7 +696,7 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", []> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; } @@ -702,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -738,7 +742,7 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { let ImmT = typeinfo.ImmEncoding; } @@ -762,7 +766,6 @@ class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; - // BinOpRI_RFF - Instructions like "adc reg, reg, imm". class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -776,7 +779,7 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -853,7 +856,6 @@ class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo, [(store (opnode (typeinfo.VT (load addr:$dst)), typeinfo.ImmOperator:$src), addr:$dst), (implicit EFLAGS)]>; - // BinOpMI_RMW_FF - Instructions like "adc [mem], imm". class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -1219,12 +1221,12 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V; } } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 42a5014..6f9e849 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -301,34 +301,67 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), // String Pseudo Instructions // let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { -def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", - [(X86rep_movs i8)], IIC_REP_MOVS>, REP; -def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", - [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize; -def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", - [(X86rep_movs i32)], IIC_REP_MOVS>, REP; +def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)], IIC_REP_MOVS>, REP, + Requires<[In32BitMode]>; +def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, + Requires<[In32BitMode]>; +def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)], IIC_REP_MOVS>, REP, + Requires<[In32BitMode]>; } -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in -def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", - [(X86rep_movs i64)], IIC_REP_MOVS>, REP; - +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { +def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, + Requires<[In64BitMode]>; +def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", + [(X86rep_movs i64)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +} // FIXME: Should use "(X86rep_stos AL)" as the pattern. -let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", - [(X86rep_stos i8)], IIC_REP_STOS>, REP; -let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", - [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize; -let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", - [(X86rep_stos i32)], IIC_REP_STOS>, REP; - -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in -def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", - [(X86rep_stos i64)], IIC_REP_STOS>, REP; +let Defs = [ECX,EDI], isCodeGenOnly = 1 in { + let Uses = [AL,ECX,EDI] in + def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)], IIC_REP_STOS>, REP, + Requires<[In32BitMode]>; + let Uses = [AX,ECX,EDI] in + def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, + Requires<[In32BitMode]>; + let Uses = [EAX,ECX,EDI] in + def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)], IIC_REP_STOS>, REP, + Requires<[In32BitMode]>; +} +let Defs = [RCX,RDI], isCodeGenOnly = 1 in { + let Uses = [AL,RCX,RDI] in + def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; + let Uses = [AX,RCX,RDI] in + def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, + Requires<[In64BitMode]>; + let Uses = [RAX,RCX,RDI] in + def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; + + let Uses = [RAX,RCX,RDI] in + def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", + [(X86rep_stos i64)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; +} //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -1134,12 +1167,10 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0); APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0); return (~KnownZero0 & ~KnownZero1) == 0; }]>; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index ba86098..bf11fde 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -21,20 +21,25 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; + def RETW : I <0xC3, RawFrm, (outs), (ins variable_ops), + "ret{w}", + [], IIC_RET>, OpSize; def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), - "retw\t$amt", + "ret{w}\t$amt", [], IIC_RET_IMM>, OpSize; def LRETL : I <0xCB, RawFrm, (outs), (ins), - "lretl", [], IIC_RET>; + "{l}ret{l|f}", [], IIC_RET>; + def LRETW : I <0xCB, RawFrm, (outs), (ins), + "{l}ret{w|f}", [], IIC_RET>, OpSize; def LRETQ : RI <0xCB, RawFrm, (outs), (ins), - "lretq", [], IIC_RET>; + "{l}ret{q|f}", [], IIC_RET>; def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lret\t$amt", [], IIC_RET>; + "{l}ret{l|f}\t$amt", [], IIC_RET>; def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lretw\t$amt", [], IIC_RET>, OpSize; + "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize; } // Unconditional branches. diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ae3ed1b..35801e4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -126,6 +126,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, +SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -153,11 +155,17 @@ def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; +def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; +def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 307c96b..b12c1db 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1049,9 +1049,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 }, { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 }, { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 }, - { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 }, + { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 }, { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, - { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 }, + { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 }, { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 }, { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 }, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index dd7cf50..6a25312 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -374,6 +374,11 @@ def SSECC : Operand<i8> { let OperandType = "OPERAND_IMMEDIATE"; } +def AVXCC : Operand<i8> { + let PrintMethod = "printSSECC"; + let OperandType = "OPERAND_IMMEDIATE"; +} + class ImmSExtAsmOperandClass : AsmOperandClass { let SuperClasses = [ImmAsmOperand]; let RenderMethod = "addImmOperands"; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index df42627..65e3c1e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2162,15 +2162,15 @@ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, - SDNode OpNode, ValueType VT, PatFrag ld_frag, - string asm, string asm_alt, + Operand CC, SDNode OpNode, ValueType VT, + PatFrag ld_frag, string asm, string asm_alt, OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, + (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], itins.rr>; def rm : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], itins.rm>; @@ -2187,57 +2187,57 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, } } -defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, +defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSE_ALU_F32S>, XS, VEX_4V, VEX_LIG; -defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, +defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSE_ALU_F32S>, // same latency as 32 bit compare XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { - defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, + defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, XS; - defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, + defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, // same latency as 32 bit compare XD; } -multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, +multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, Intrinsic Int, string asm, OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), asm, + (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], itins.rr>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, x86memop:$src, SSECC:$cc), asm, + (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], itins.rm>; } // Aliases to match intrinsics which expect XMM operand(s). -defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, +defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", SSE_ALU_F32S>, XS, VEX_4V; -defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, +defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", SSE_ALU_F32S>, // same latency as f32 XD, VEX_4V; let Constraints = "$src1 = $dst" in { - defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, + defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $dst|$dst, $src}", SSE_ALU_F32S>, XS; - defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, + defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $dst|$dst, $src}", SSE_ALU_F32S>, // same latency as f32 XD; @@ -2308,50 +2308,50 @@ let Defs = [EFLAGS] in { // sse12_cmp_packed - sse 1 & 2 compare packed instructions multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, - Intrinsic Int, string asm, string asm_alt, - Domain d> { - let isAsmParserOnly = 1 in { - def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; - def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; - } + Operand CC, Intrinsic Int, string asm, + string asm_alt, Domain d> { + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], + IIC_SSE_CMPP_RR, d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], + IIC_SSE_CMPP_RM, d>; // Accept explicit immediate argument form instead of comparison code. - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + let neverHasSideEffects = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RR, d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RM, d>; + } } -defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, +defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedSingle>, TB, VEX_4V; -defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, +defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedDouble>, TB, OpSize, VEX_4V; -defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, +defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedSingle>, TB, VEX_4V; -defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, +defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedDouble>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { - defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, + defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedSingle>, TB; - defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, + defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd, "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedDouble>, TB, OpSize; @@ -6331,11 +6331,11 @@ def : Pat<(f64 (ftrunc FR64:$src)), let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize, VEX; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize, VEX; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), @@ -6351,11 +6351,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; } @@ -6735,12 +6735,32 @@ let Predicates = [HasAVX] in { def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + + def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), + (imm:$mask))), + (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; + def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), + (imm:$mask))), + (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; + + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + (imm:$mask))), + (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (imm:$mask))), + (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (imm:$mask))), + (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), + (imm:$mask))), + (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6789,6 +6809,17 @@ let Predicates = [HasSSE41] in { def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; + + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + (imm:$mask))), + (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (imm:$mask))), + (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (imm:$mask))), + (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; + } let Predicates = [HasAVX] in @@ -7294,6 +7325,46 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), []>, VEX_4V; } +let Predicates = [HasAVX] in { +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // @@ -7664,45 +7735,47 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), // multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, - Intrinsic Int> { + ValueType OpVT> { def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V; + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, - (bitconvert (mem_frag addr:$src2))))]>, + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, + (bitconvert (mem_frag addr:$src2)))))]>, VEX_4V; } -defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>; +defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; +defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>; multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, - Intrinsic Int> { - def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), + ValueType OpVT> { + def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX; - def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), + [(set VR256:$dst, + (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, VEX; + def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>, - VEX; + [(set VR256:$dst, + (OpVT (X86VPermi (mem_frag addr:$src1), + (i8 imm:$src2))))]>, VEX; } -defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>, - VEX_W; +defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W; let ExeDomain = SSEPackedDouble in -defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, - VEX_W; +defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks @@ -7743,18 +7816,17 @@ def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values // +let neverHasSideEffects = 1 in { def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>, + []>, VEX_4V; def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i128mem:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), - imm:$src3))]>, VEX_4V; + []>, VEX_4V; +} let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), @@ -7775,47 +7847,6 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (INSERT_get_vinsertf128_imm VR256:$ins))>; } -// AVX1 patterns -let Predicates = [HasAVX] in { -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; - -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -} - //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values // @@ -7830,7 +7861,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), (v2i64 (VEXTRACTI128rr (v4i64 VR256:$src1), diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 3eb9441..ed1a409 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -425,7 +424,9 @@ bool X86Subtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + //TODO: change back to ANTIDEP_CRITICAL when the + // X86 subtarget properly sets up post RA liveness. + Mode = TargetSubtargetInfo::ANTIDEP_NONE; CriticalPathRCs.clear(); return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index c0d2a9c..718f35e 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -14,7 +14,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/Mangler.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index 64f1a8e..1cfdbda 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "XCoreMCAsmInfo.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; void XCoreMCAsmInfo::anchor() { } diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h index 24e170a..0767775 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h @@ -14,10 +14,10 @@ #ifndef XCORETARGETASMINFO_H #define XCORETARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { + class StringRef; class Target; class XCoreMCAsmInfo : public MCAsmInfo { diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h index 3cfc376..a255adb 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h @@ -15,9 +15,7 @@ #define XCOREMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target TheXCoreTarget; diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 593cebc..fdf2b78 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1363,8 +1363,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne); - if (KnownZero == Mask) { + DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + if ((KnownZero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); SDValue Ops [] = { Carry, Result }; @@ -1386,8 +1386,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne); - if (KnownZero == Mask) { + DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + if ((KnownZero & Mask) == Mask) { SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), N2); @@ -1402,8 +1402,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne); - if (KnownZero == Mask) { + DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + if ((KnownZero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); SDValue Ops [] = { Borrow, Result }; @@ -1521,21 +1521,19 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, } void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case XCoreISD::LADD: case XCoreISD::LSUB: if (Op.getResNo() == 0) { // Top bits of carry / borrow are clear. - KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), - Mask.getBitWidth() - 1); - KnownZero &= Mask; + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 1); } break; } diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 5cd3e67..0b63ecd 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -160,7 +160,6 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, |