aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.td2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp28
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp293
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h12
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMCallingConv.td31
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp100
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp164
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp137
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp367
-rw-r--r--lib/Target/ARM/ARMISelLowering.h15
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td78
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td34
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td168
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td31
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td8
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td1
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td1
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp13
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp116
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp861
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp50
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h106
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp90
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp11
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp42
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h5
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td12
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp33
-rw-r--r--lib/Target/Mangler.cpp3
-rw-r--r--lib/Target/Mips/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp68
-rw-r--r--lib/Target/Mips/CMakeLists.txt8
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp20
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp15
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp17
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp6
-rw-r--r--lib/Target/Mips/Makefile4
-rw-r--r--lib/Target/Mips/Mips.td17
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp87
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h43
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp132
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h76
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td294
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp111
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h37
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td23
-rw-r--r--lib/Target/Mips/MipsCallingConv.td12
-rw-r--r--lib/Target/Mips/MipsELFWriterInfo.cpp92
-rw-r--r--lib/Target/Mips/MipsELFWriterInfo.h59
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp224
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h25
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp68
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp169
-rw-r--r--lib/Target/Mips/MipsISelLowering.h1
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td56
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td36
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp309
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h98
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td52
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp47
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h33
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp74
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h13
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td3
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp210
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h44
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp320
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h86
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp138
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h39
-rw-r--r--lib/Target/Mips/MipsSubtarget.h4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp31
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h129
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp5
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp6
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td9
-rw-r--r--lib/Target/PowerPC/TargetInfo/Makefile2
-rw-r--r--lib/Target/README.txt5
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp3
-rw-r--r--lib/Target/TargetLibraryInfo.cpp105
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp59
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp9
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h10
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c16
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h71
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h25
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp7
-rw-r--r--lib/Target/X86/X86.h2
-rw-r--r--lib/Target/X86/X86.td8
-rw-r--r--lib/Target/X86/X86AsmPrinter.h8
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.cpp1
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h4
-rw-r--r--lib/Target/X86/X86FastISel.cpp82
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp18
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp192
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1460
-rw-r--r--lib/Target/X86/X86ISelLowering.h39
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td2
-rw-r--r--lib/Target/X86/X86InstrExtension.td8
-rw-r--r--lib/Target/X86/X86InstrFMA.td364
-rw-r--r--lib/Target/X86/X86InstrFormats.td12
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td45
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp460
-rw-r--r--lib/Target/X86/X86InstrInfo.h12
-rw-r--r--lib/Target/X86/X86InstrMMX.td18
-rw-r--r--lib/Target/X86/X86InstrSSE.td934
-rw-r--r--lib/Target/X86/X86JITInfo.cpp17
-rw-r--r--lib/Target/X86/X86JITInfo.h2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp62
-rw-r--r--lib/Target/X86/X86MCInstLower.h6
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h6
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp8
-rw-r--r--lib/Target/X86/X86RegisterInfo.td7
-rw-r--r--lib/Target/X86/X86Relocations.h2
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp39
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp5
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h2
131 files changed, 6355 insertions, 4012 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index cd3c0e0..69e2346 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -224,7 +224,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops,
def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureVFP2,
+ FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 9a1ce06..e9e2803 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -529,10 +529,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
+ // This modifier is not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
- case 'H': // The highest-numbered register of a pair.
return true;
+ case 'H': { // The highest-numbered register of a pair.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const TargetRegisterClass &RC = ARM::GPRRegClass;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
+ RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
+
+ unsigned Reg = RC.getRegister(RegIdx);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
}
}
@@ -1136,8 +1150,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(SrcReg == ARM::SP &&
"Only stack pointer as a source reg is supported");
for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
- i != NumOps; ++i)
- RegList.push_back(MI->getOperand(i).getReg());
+ i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Actually, there should never be any impdef stuff here. Skip it
+ // temporary to workaround PR11902.
+ if (MO.isImplicit())
+ continue;
+ RegList.push_back(MO.getReg());
+ }
break;
case ARM::STR_PRE_IMM:
case ARM::STR_PRE_REG:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 714238a..29033e5 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
@@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
}
break;
case ARM::VST1q64:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST1d64QPseudo:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
- case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
@@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
}
break;
case ARM::VLD1q64:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64QPseudo:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -1524,6 +1568,139 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
}
+/// Identify instructions that can be folded into a MOVCC instruction, and
+/// return the corresponding opcode for the predicated pseudo-instruction.
+static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
+ const MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return 0;
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return 0;
+ MI = MRI.getVRegDef(Reg);
+ if (!MI)
+ return 0;
+ // Check if MI has any non-dead defs or physreg uses. This also detects
+ // predicated instructions which will be reading CPSR.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Reject frame index operands, PEI can't handle the predicated pseudos.
+ if (MO.isFI() || MO.isCPI() || MO.isJTI())
+ return 0;
+ if (!MO.isReg())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ return 0;
+ if (MO.isDef() && !MO.isDead())
+ return 0;
+ }
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::ANDri: return ARM::ANDCCri;
+ case ARM::ANDrr: return ARM::ANDCCrr;
+ case ARM::ANDrsi: return ARM::ANDCCrsi;
+ case ARM::ANDrsr: return ARM::ANDCCrsr;
+ case ARM::t2ANDri: return ARM::t2ANDCCri;
+ case ARM::t2ANDrr: return ARM::t2ANDCCrr;
+ case ARM::t2ANDrs: return ARM::t2ANDCCrs;
+ case ARM::EORri: return ARM::EORCCri;
+ case ARM::EORrr: return ARM::EORCCrr;
+ case ARM::EORrsi: return ARM::EORCCrsi;
+ case ARM::EORrsr: return ARM::EORCCrsr;
+ case ARM::t2EORri: return ARM::t2EORCCri;
+ case ARM::t2EORrr: return ARM::t2EORCCrr;
+ case ARM::t2EORrs: return ARM::t2EORCCrs;
+ case ARM::ORRri: return ARM::ORRCCri;
+ case ARM::ORRrr: return ARM::ORRCCrr;
+ case ARM::ORRrsi: return ARM::ORRCCrsi;
+ case ARM::ORRrsr: return ARM::ORRCCrsr;
+ case ARM::t2ORRri: return ARM::t2ORRCCri;
+ case ARM::t2ORRrr: return ARM::t2ORRCCrr;
+ case ARM::t2ORRrs: return ARM::t2ORRCCrs;
+
+ // ARM ADD/SUB
+ case ARM::ADDri: return ARM::ADDCCri;
+ case ARM::ADDrr: return ARM::ADDCCrr;
+ case ARM::ADDrsi: return ARM::ADDCCrsi;
+ case ARM::ADDrsr: return ARM::ADDCCrsr;
+ case ARM::SUBri: return ARM::SUBCCri;
+ case ARM::SUBrr: return ARM::SUBCCrr;
+ case ARM::SUBrsi: return ARM::SUBCCrsi;
+ case ARM::SUBrsr: return ARM::SUBCCrsr;
+
+ // Thumb2 ADD/SUB
+ case ARM::t2ADDri: return ARM::t2ADDCCri;
+ case ARM::t2ADDri12: return ARM::t2ADDCCri12;
+ case ARM::t2ADDrr: return ARM::t2ADDCCrr;
+ case ARM::t2ADDrs: return ARM::t2ADDCCrs;
+ case ARM::t2SUBri: return ARM::t2SUBCCri;
+ case ARM::t2SUBri12: return ARM::t2SUBCCri12;
+ case ARM::t2SUBrr: return ARM::t2SUBCCrr;
+ case ARM::t2SUBrs: return ARM::t2SUBCCrs;
+ }
+}
+
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ // MOVCC operands:
+ // 0: Def.
+ // 1: True use.
+ // 2: False use.
+ // 3: Condition code.
+ // 4: CPSR use.
+ TrueOp = 1;
+ FalseOp = 2;
+ Cond.push_back(MI->getOperand(3));
+ Cond.push_back(MI->getOperand(4));
+ // We can always fold a def.
+ Optimizable = true;
+ return false;
+}
+
+MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+ bool PreferFalse) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI = 0;
+ unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
+ bool Invert = !Opc;
+ if (!Opc)
+ Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
+ if (!Opc)
+ return 0;
+
+ // Create a new predicated version of DefMI.
+ // Rfalse is the first use.
+ MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(Opc), MI->getOperand(0).getReg())
+ .addOperand(MI->getOperand(Invert ? 2 : 1));
+
+ // Copy all the DefMI operands, excluding its (null) predicate.
+ const MCInstrDesc &DefDesc = DefMI->getDesc();
+ for (unsigned i = 1, e = DefDesc.getNumOperands();
+ i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
+ NewMI.addOperand(DefMI->getOperand(i));
+
+ unsigned CondCode = MI->getOperand(3).getImm();
+ if (Invert)
+ NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
+ else
+ NewMI.addImm(CondCode);
+ NewMI.addOperand(MI->getOperand(4));
+
+ // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
+ if (NewMI->hasOptionalDef())
+ AddDefaultCC(NewMI);
+
+ // The caller will erase MI, but not DefMI.
+ DefMI->eraseFromParent();
+ return NewMI;
+}
+
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
/// instruction is encoded with an 'S' bit is determined by the optional CPSR
/// def operand.
@@ -3180,11 +3357,18 @@ enum ARMExeDomain {
//
std::pair<uint16_t, uint16_t>
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
- // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
- // predicated.
+ // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+ // if they are not predicated.
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+ // Cortex-A9 is particularly picky about mixing the two and wants these
+ // converted.
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ (MI->getOpcode() == ARM::VMOVRS ||
+ MI->getOpcode() == ARM::VMOVSR))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
// No other instructions can be swizzled, so just determine their domain.
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
@@ -3204,22 +3388,95 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
- // We only know how to change VMOVD into VORR.
- assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
- if (Domain != ExeNEON)
- return;
+ unsigned DstReg, SrcReg, DReg;
+ unsigned Lane;
+ MachineInstrBuilder MIB(MI);
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool isKill;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("cannot handle opcode!");
+ break;
+ case ARM::VMOVD:
+ if (Domain != ExeNEON)
+ break;
- // Zap the predicate operands.
- assert(!isPredicated(MI) && "Cannot predicate a VORRd");
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
- // Change to a VORRd which requires two identical use operands.
- MI->setDesc(get(ARM::VORRd));
+ // Change to a VORRd which requires two identical use operands.
+ MI->setDesc(get(ARM::VORRd));
+
+ // Add the extra source operand and new predicates.
+ // This will go before any implicit ops.
+ AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+ break;
+ case ARM::VMOVRS:
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+ if (DReg == ARM::NoRegister) {
+ DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
+ Lane = 1;
+ assert(DReg && "S-register with no D super-register?");
+ }
+
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+ MI->RemoveOperand(1);
+
+ MI->setDesc(get(ARM::VGETLNi32));
+ MIB.addReg(DReg);
+ MIB.addImm(Lane);
+
+ MIB->getOperand(1).setIsUndef();
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ AddDefaultPred(MIB);
+ break;
+ case ARM::VMOVSR:
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+ if (DReg == ARM::NoRegister) {
+ DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
+ Lane = 1;
+ assert(DReg && "S-register with no D super-register?");
+ }
+ isKill = MI->getOperand(0).isKill();
+
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+ MI->RemoveOperand(1);
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(ARM::VSETLNi32));
+ MIB.addReg(DReg, RegState::Define);
+ MIB.addReg(DReg, RegState::Undef);
+ MIB.addReg(SrcReg);
+ MIB.addImm(Lane);
+
+ if (isKill)
+ MIB->addRegisterKilled(DstReg, TRI, true);
+ MIB->addRegisterDefined(DstReg, TRI);
+
+ AddDefaultPred(MIB);
+ break;
+ }
- // Add the extra source operand and new predicates.
- // This will go before any implicit ops.
- AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
}
bool ARMBaseInstrInfo::hasNOP() const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 1a10a4a..92e5ee8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -202,6 +202,13 @@ public:
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
+ virtual bool analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const;
+
+ virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const;
+
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
@@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
int getMatchingCondBranchOpcode(int Opc);
+/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
+/// opcode of the SSA instruction representing the conditional MI.
+unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
+ MachineInstr *&MI,
+ const MachineRegisterInfo &MRI);
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
/// the instruction is encoded with an 'S' bit is determined by the optional
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 231bd26..9deb96e 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,8 +62,20 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ bool ghcCall = false;
+
+ if (MF) {
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ }
+
+ if (ghcCall) {
+ return CSR_GHC_SaveList;
+ }
+ else {
return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ }
}
const uint32_t*
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index b9a2512..bda1517 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[
CCDelegateTo<RetCC_ARM_APCS>
]>;
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_APCS_GHC : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
+ CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
+ CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
+]>;
//===----------------------------------------------------------------------===//
// ARM AAPCS (EABI) Calling Convention, common parts
@@ -113,6 +132,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -138,6 +160,9 @@ def RetCC_ARM_AAPCS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -171,3 +196,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+
+// GHC set of callee saved regs is empty as all those regs are
+// used for passing STG regs around
+// add is a workaround for not being able to compile empty list:
+// def CSR_GHC : CalleeSavedRegs<()>;
+def CSR_GHC : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index af260a5..132b81f 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -264,7 +264,7 @@ namespace {
emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
return 0;
}
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
int32_t Imm12 = MO1.getImm();
uint32_t Binary;
Binary = Imm12 & 0xfff;
@@ -314,18 +314,24 @@ namespace {
// {7-0} = imm8
uint32_t Binary = 0;
const MachineOperand &MO = MI.getOperand(Op);
- uint32_t Reg = getMachineOpValue(MI, MO);
- Binary |= (Reg << 9);
-
- // If there is a non-zero immediate offset, encode it.
- if (MO.isReg()) {
- const MachineOperand &MO1 = MI.getOperand(Op + 1);
- if (uint32_t ImmOffs = ARM_AM::getAM5Offset(MO1.getImm())) {
- if (ARM_AM::getAM5Op(MO1.getImm()) == ARM_AM::add)
- Binary |= 1 << 8;
- Binary |= ImmOffs & 0xff;
- return Binary;
- }
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+
+ // Special value for #-0
+ if (Imm12 == INT32_MIN)
+ Imm12 = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs
+ // sub.
+ bool isAdd = true;
+ if (Imm12 < 0) {
+ Imm12 = -Imm12;
+ isAdd = false;
}
// If immediate offset is omitted, default to +0.
@@ -367,6 +373,12 @@ namespace {
void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
intptr_t JTBase = 0) const;
+ unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
+ unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
};
}
@@ -455,7 +467,7 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
if (MO.isReg())
- return getARMRegisterNumbering(MO.getReg());
+ return II->getRegisterInfo().getEncodingValue(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
else if (MO.isFPImm())
@@ -816,7 +828,7 @@ void ARMCodeEmitter::emitLEApcrelInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
// Encode Rn which is PC.
- Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
// Encode the displacement which is a so_imm.
// Set bit I(25) to identify this is the immediate form of <shifter_op>
@@ -844,7 +856,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
// Encode Rn which is PC.
- Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
// Encode the displacement.
Binary |= 1 << ARMII::I_BitShift;
@@ -1045,7 +1057,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
if (Rs) {
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
}
// Encode shift_imm bit[11:7].
@@ -1101,7 +1113,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
else if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
if (MCID.Opcode == ARM::MOVi16) {
// Get immediate from MI.
@@ -1151,7 +1163,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (!isUnary) {
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else {
Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
++OpIdx;
@@ -1168,7 +1180,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (MO.isReg()) {
// Encode register Rm.
- emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+ emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
return;
}
@@ -1217,14 +1229,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
// Set first operand
if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1251,7 +1263,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
Binary |= 1 << ARMII::I_BitShift;
assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
// Set bit[3:0] to the corresponding Rm register
- Binary |= getARMRegisterNumbering(MO2.getReg());
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
// If this instr is in scaled register offset/index instruction, set
// shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
@@ -1295,7 +1307,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+ Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1314,7 +1326,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// If this instr is in register offset/index encoding, set bit[3:0]
// to the corresponding Rm register.
if (MO2.getReg()) {
- Binary |= getARMRegisterNumbering(MO2.getReg());
+ Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
emitWordLE(Binary);
return;
}
@@ -1385,7 +1397,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isImplicit())
break;
- unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+ unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
RegNum < 16);
Binary |= 0x1 << RegNum;
@@ -1632,7 +1644,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
// The return register is LR.
- Binary |= getARMRegisterNumbering(ARM::LR);
+ Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
else
// otherwise, set the return register
Binary |= getMachineOpValue(MI, 0);
@@ -1640,11 +1652,12 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
bool isSPVFP = ARM::SPRRegClass.contains(RegD);
- RegD = getARMRegisterNumbering(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
if (!isSPVFP) {
Binary |= (RegD & 0x0F) << ARMII::RegRdShift;
Binary |= ((RegD & 0x10) >> 4) << ARMII::D_BitShift;
@@ -1655,11 +1668,12 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
return Binary;
}
-static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
bool isSPVFP = ARM::SPRRegClass.contains(RegN);
- RegN = getARMRegisterNumbering(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
if (!isSPVFP) {
Binary |= (RegN & 0x0F) << ARMII::RegRnShift;
Binary |= ((RegN & 0x10) >> 4) << ARMII::N_BitShift;
@@ -1670,11 +1684,12 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
return Binary;
}
-static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
bool isSPVFP = ARM::SPRRegClass.contains(RegM);
- RegM = getARMRegisterNumbering(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
if (!isSPVFP) {
Binary |= (RegM & 0x0F);
Binary |= ((RegM & 0x10) >> 4) << ARMII::M_BitShift;
@@ -1885,28 +1900,31 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegD = getARMRegisterNumbering(RegD);
+ RegD = II->getRegisterInfo().getEncodingValue(RegD);
Binary |= (RegD & 0xf) << ARMII::RegRdShift;
Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
return Binary;
}
-static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegN = getARMRegisterNumbering(RegN);
+ RegN = II->getRegisterInfo().getEncodingValue(RegN);
Binary |= (RegN & 0xf) << ARMII::RegRnShift;
Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
return Binary;
}
-static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
+ unsigned OpIdx) const {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegM = getARMRegisterNumbering(RegM);
+ RegM = II->getRegisterInfo().getEncodingValue(RegM);
Binary |= (RegM & 0xf);
Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
return Binary;
@@ -1940,7 +1958,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
- RegT = getARMRegisterNumbering(RegT);
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, RegNOpIdx);
@@ -1969,7 +1987,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(1).getReg();
- RegT = getARMRegisterNumbering(RegT);
+ RegT = II->getRegisterInfo().getEncodingValue(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, 0);
emitWordLE(Binary);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a242b13..15bb32e 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1009,7 +1009,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+ unsigned Lane = TRI->getEncodingValue(SrcReg) & 1;
unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
&ARM::DPR_VFP2RegClass);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index b96395f..5a5ca1b 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -87,8 +87,9 @@ class ARMFastISel : public FastISel {
LLVMContext *Context;
public:
- explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
- : FastISel(funcInfo),
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo),
TM(funcInfo.MF->getTarget()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()) {
@@ -99,51 +100,53 @@ class ARMFastISel : public FastISel {
}
// Code from FastISel.cpp.
- virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC);
- virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill);
- virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill);
- virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- unsigned Op2, bool Op2IsKill);
- virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm);
- virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- const ConstantFP *FPImm);
- virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm);
- virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm);
- virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm1, uint64_t Imm2);
-
- virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
- unsigned Op0, bool Op0IsKill,
- uint32_t Idx);
+ private:
+ unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
+ unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2);
+
+ unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
// Backend specific FastISel code.
+ private:
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
-
+ private:
#include "ARMGenFastISel.inc"
// Instruction selection routines.
@@ -167,6 +170,7 @@ class ARMFastISel : public FastISel {
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
+ bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
// Utility routines.
private:
@@ -1819,9 +1823,12 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::Fast:
- // Ignore fastcc. Silence compiler warnings.
- (void)RetFastCC_ARM_APCS;
- (void)FastCC_ARM_APCS;
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
// Fallthrough
case CallingConv::C:
// Use target triple & subtarget features to do actual dispatch.
@@ -1842,6 +1849,11 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::GHC:
+ if (Return)
+ llvm_unreachable("Can't return in GHC call convention");
+ else
+ return CC_ARM_APCS_GHC;
}
}
@@ -2608,6 +2620,61 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
return true;
}
+bool ARMFastISel::SelectShift(const Instruction *I,
+ ARM_AM::ShiftOpc ShiftTy) {
+ // We handle thumb2 mode by target independent selector
+ // or SelectionDAG ISel.
+ if (isThumb2)
+ return false;
+
+ // Only handle i32 now.
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+ if (DestVT != MVT::i32)
+ return false;
+
+ unsigned Opc = ARM::MOVsr;
+ unsigned ShiftImm;
+ Value *Src2Value = I->getOperand(1);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
+ ShiftImm = CI->getZExtValue();
+
+ // Fall back to selection DAG isel if the shift amount
+ // is zero or greater than the width of the value type.
+ if (ShiftImm == 0 || ShiftImm >=32)
+ return false;
+
+ Opc = ARM::MOVsi;
+ }
+
+ Value *Src1Value = I->getOperand(0);
+ unsigned Reg1 = getRegForValue(Src1Value);
+ if (Reg1 == 0) return false;
+
+ unsigned Reg2;
+ if (Opc == ARM::MOVsr) {
+ Reg2 = getRegForValue(Src2Value);
+ if (Reg2 == 0) return false;
+ }
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ if(ResultReg == 0) return false;
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg1);
+
+ if (Opc == ARM::MOVsi)
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
+ else if (Opc == ARM::MOVsr) {
+ MIB.addReg(Reg2);
+ MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
+ }
+
+ AddOptionalDefs(MIB);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
// TODO: SoftFP support.
bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
@@ -2668,6 +2735,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
+ case Instruction::Shl:
+ return SelectShift(I, ARM_AM::lsl);
+ case Instruction::LShr:
+ return SelectShift(I, ARM_AM::lsr);
+ case Instruction::AShr:
+ return SelectShift(I, ARM_AM::asr);
default: break;
}
return false;
@@ -2720,14 +2793,15 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
}
namespace llvm {
- FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
// Completely untested on non-iOS.
const TargetMachine &TM = funcInfo.MF->getTarget();
// Darwin and thumb1 only for now.
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
- return new ARMFastISel(funcInfo);
+ return new ARMFastISel(funcInfo, libInfo);
return 0;
}
}
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2629496..aee72d2 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,6 +15,8 @@
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Function.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -151,6 +153,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
int FramePtrSpillFI = 0;
int D8SpillFI = 0;
+ // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
// Allocate the vararg register save area. This is not counted in NumBytes.
if (VARegSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
@@ -354,6 +360,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ return;
+
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1953192..c6f9d15 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -47,11 +47,6 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
-static cl::opt<bool>
-DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
- cl::desc("Enable / disable ARM integer abs transform"),
- cl::init(false));
-
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -244,7 +239,6 @@ private:
/// SelectCMOVOp - Select CMOV instructions for ARM.
SDNode *SelectCMOVOp(SDNode *N);
- SDNode *SelectConditionalOp(SDNode *N);
SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
@@ -2368,115 +2362,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
-SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
- SDValue FalseVal = N->getOperand(0);
- SDValue TrueVal = N->getOperand(1);
- ARMCC::CondCodes CCVal =
- (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- SDValue CCR = N->getOperand(3);
- assert(CCR.getOpcode() == ISD::Register);
- SDValue InFlag = N->getOperand(4);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-
- if (Subtarget->isThumb()) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCrs; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
- }
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
- }
-
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (T) {
- unsigned TrueImm = T->getZExtValue();
- if (is_t2_so_imm(TrueImm)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCri; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCri; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
- }
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
- }
- }
-
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break;
- case ARMISD::COR: Opc = ARM::t2ORRCCrr; break;
- case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
- }
- SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
- }
-
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
- if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrsi; break;
- case ARMISD::COR: Opc = ARM::ORRCCrsi; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
- }
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
- }
-
- if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrsr; break;
- case ARMISD::COR: Opc = ARM::ORRCCrsr; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
- }
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
- }
-
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (T) {
- unsigned TrueImm = T->getZExtValue();
- if (is_so_imm(TrueImm)) {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCri; break;
- case ARMISD::COR: Opc = ARM::ORRCCri; break;
- case ARMISD::CXOR: Opc = ARM::EORCCri; break;
- }
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
- }
- }
-
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ARMISD::CAND: Opc = ARM::ANDCCrr; break;
- case ARMISD::COR: Opc = ARM::ORRCCrr; break;
- case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
- }
- SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
-}
-
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@@ -2492,14 +2377,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
SDValue XORSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- if (DisableARMIntABS)
- return NULL;
-
if (Subtarget->isThumb1Only())
return NULL;
- if (XORSrc0.getOpcode() != ISD::ADD ||
- XORSrc1.getOpcode() != ISD::SRA)
+ if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
return NULL;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
@@ -2510,16 +2391,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
- if (ADDSrc1 == XORSrc1 &&
- ADDSrc0 == SRASrc0 &&
- XType.isInteger() &&
- SRAConstant != NULL &&
+ if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
+ XType.isInteger() && SRAConstant != NULL &&
Size == SRAConstant->getZExtValue()) {
-
- unsigned Opcode = ARM::ABS;
- if (Subtarget->isThumb2())
- Opcode = ARM::t2ABS;
-
+ unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
}
@@ -2814,10 +2689,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV:
return SelectCMOVOp(N);
- case ARMISD::CAND:
- case ARMISD::COR:
- case ARMISD::CXOR:
- return SelectConditionalOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 04370c0..df4039b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -90,75 +90,70 @@ static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
-void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
- EVT PromotedBitwiseVT) {
+void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
+ MVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
- setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
- setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
}
- EVT ElemTy = VT.getVectorElementType();
+ MVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
- setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
if (ElemTy == MVT::i32) {
- setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
} else {
- setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
- }
- setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
- setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
}
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
- setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::AND, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
- setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::OR, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
- setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
- AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
- PromotedBitwiseVT.getSimpleVT());
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
}
// Neon does not support vector divide/remainder operations.
- setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
}
-void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
+void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &ARM::DPRRegClass);
addTypeForNEON(VT, MVT::f64, MVT::v2i32);
}
-void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
+void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &ARM::QPRRegClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
@@ -903,9 +898,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
- case ARMISD::CAND: return "ARMISD::CAND";
- case ARMISD::COR: return "ARMISD::COR";
- case ARMISD::CXOR: return "ARMISD::CXOR";
case ARMISD::RBIT: return "ARMISD::RBIT";
@@ -1041,8 +1033,9 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
// Create a fast isel object.
FastISel *
-ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
- return ARM::createFastISel(funcInfo);
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return ARM::createFastISel(funcInfo, libInfo);
}
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
@@ -1171,6 +1164,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::GHC:
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
}
@@ -4271,6 +4266,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Record this extraction against the appropriate vector if possible...
SDValue SourceVec = V.getOperand(0);
+ // If the element number isn't a constant, we can't effectively
+ // analyze what's going on.
+ if (!isa<ConstantSDNode>(V.getOperand(1)))
+ return SDValue();
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
bool FoundSource = false;
for (unsigned j = 0; j < SourceVecs.size(); ++j) {
@@ -6152,13 +6151,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
}
// Add the jump table entries as successors to the MBB.
- MachineBasicBlock *PrevMBB = 0;
+ SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
for (std::vector<MachineBasicBlock*>::iterator
I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
MachineBasicBlock *CurMBB = *I;
- if (PrevMBB != CurMBB)
+ if (SeenMBBs.insert(CurMBB))
DispContBB->addSuccessor(CurMBB);
- PrevMBB = CurMBB;
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
@@ -6971,62 +6969,137 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// ARM Optimization Hooks
//===----------------------------------------------------------------------===//
+// Helper function that checks if N is a null or all ones constant.
+static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ if (!C)
+ return false;
+ return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+}
+
+// Return true if N is conditionally 0 or all ones.
+// Detects these expressions where cc is an i1 value:
+//
+// (select cc 0, y) [AllOnes=0]
+// (select cc y, 0) [AllOnes=0]
+// (zext cc) [AllOnes=0]
+// (sext cc) [AllOnes=0/1]
+// (select cc -1, y) [AllOnes=1]
+// (select cc y, -1) [AllOnes=1]
+//
+// Invert is set when N is the null/all ones constant when CC is false.
+// OtherOp is set to the alternative value of N.
+static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
+ SDValue &CC, bool &Invert,
+ SDValue &OtherOp,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default: return false;
+ case ISD::SELECT: {
+ CC = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ if (isZeroOrAllOnes(N1, AllOnes)) {
+ Invert = false;
+ OtherOp = N2;
+ return true;
+ }
+ if (isZeroOrAllOnes(N2, AllOnes)) {
+ Invert = true;
+ OtherOp = N1;
+ return true;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ // (zext cc) can never be the all ones value.
+ if (AllOnes)
+ return false;
+ // Fall through.
+ case ISD::SIGN_EXTEND: {
+ EVT VT = N->getValueType(0);
+ CC = N->getOperand(0);
+ if (CC.getValueType() != MVT::i1)
+ return false;
+ Invert = !AllOnes;
+ if (AllOnes)
+ // When looking for an AllOnes constant, N is an sext, and the 'other'
+ // value is 0.
+ OtherOp = DAG.getConstant(0, VT);
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ // When looking for a 0 constant, N can be zext or sext.
+ OtherOp = DAG.getConstant(1, VT);
+ else
+ OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ return true;
+ }
+ }
+}
+
+// Combine a constant select operand into its use:
+//
+// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
+// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+//
+// The transform is rejected if the select doesn't have a constant operand that
+// is null, or all ones when AllOnes is set.
+//
+// Also recognize sext/zext from i1:
+//
+// (add (zext cc), x) -> (select cc (add x, 1), x)
+// (add (sext cc), x) -> (select cc (add x, -1), x)
+//
+// These transformations eventually create predicated instructions.
+//
+// @param N The node to transform.
+// @param Slct The N operand that is a select.
+// @param OtherOp The other N operand (x above).
+// @param DCI Context.
+// @param AllOnes Require the select constant to be all ones instead of null.
+// @returns The new node, or SDValue() on failure.
static
SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ bool AllOnes = false) {
SelectionDAG &DAG = DCI.DAG;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
- unsigned Opc = N->getOpcode();
- bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
- SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
- SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
- ISD::CondCode CC = ISD::SETCC_INVALID;
-
- if (isSlctCC) {
- CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
- } else {
- SDValue CCOp = Slct.getOperand(0);
- if (CCOp.getOpcode() == ISD::SETCC)
- CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
- }
-
- bool DoXform = false;
- bool InvCC = false;
- assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
- "Bad input!");
-
- if (LHS.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(LHS)->isNullValue()) {
- DoXform = true;
- } else if (CC != ISD::SETCC_INVALID &&
- RHS.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(RHS)->isNullValue()) {
- std::swap(LHS, RHS);
- SDValue Op0 = Slct.getOperand(0);
- EVT OpVT = isSlctCC ? Op0.getValueType() :
- Op0.getOperand(0).getValueType();
- bool isInt = OpVT.isInteger();
- CC = ISD::getSetCCInverse(CC, isInt);
-
- if (!TLI.isCondCodeLegal(CC, OpVT))
- return SDValue(); // Inverse operator isn't legal.
-
- DoXform = true;
- InvCC = true;
- }
-
- if (DoXform) {
- SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
- if (isSlctCC)
- return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
- Slct.getOperand(0), Slct.getOperand(1), CC);
- SDValue CCOp = Slct.getOperand(0);
- if (InvCC)
- CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
- CCOp.getOperand(0), CCOp.getOperand(1), CC);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
- CCOp, OtherOp, Result);
+ SDValue NonConstantVal;
+ SDValue CCOp;
+ bool SwapSelectOps;
+ if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
+ NonConstantVal, DAG))
+ return SDValue();
+
+ // Slct is now know to be the desired identity constant when CC is true.
+ SDValue TrueVal = OtherOp;
+ SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ OtherOp, NonConstantVal);
+ // Unless SwapSelectOps says CC should be false.
+ if (SwapSelectOps)
+ std::swap(TrueVal, FalseVal);
+
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, TrueVal, FalseVal);
+}
+
+// Attempt combineSelectAndUse on each operand of a commutative operator N.
+static
+SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
+ }
+ if (N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
+ if (Result.getNode())
+ return Result;
}
return SDValue();
}
@@ -7134,7 +7207,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return Result;
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
- if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ if (N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
if (Result.getNode()) return Result;
}
@@ -7166,7 +7239,7 @@ static SDValue PerformSUBCombine(SDNode *N,
SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ if (N1.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
if (Result.getNode()) return Result;
}
@@ -7294,49 +7367,6 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
-static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
- if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
- return false;
-
- SDValue FalseVal = N.getOperand(0);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
- if (!C)
- return false;
- if (AllOnes)
- return C->isAllOnesValue();
- return C->isNullValue();
-}
-
-/// formConditionalOp - Combine an operation with a conditional move operand
-/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
-/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
-static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
- bool Commutable) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- bool isAND = N->getOpcode() == ISD::AND;
- bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
- if (!isCand && Commutable) {
- isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
- if (isCand)
- std::swap(N0, N1);
- }
- if (!isCand)
- return SDValue();
-
- unsigned Opc = 0;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected node");
- case ISD::AND: Opc = ARMISD::CAND; break;
- case ISD::OR: Opc = ARMISD::COR; break;
- case ISD::XOR: Opc = ARMISD::CXOR; break;
- }
- return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
- N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
- N1.getOperand(4));
-}
-
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -7371,10 +7401,10 @@ static SDValue PerformANDCombine(SDNode *N,
}
if (!Subtarget->isThumb1Only()) {
- // (and x, (cmov -1, y, cond)) => (and.cond x, y)
- SDValue CAND = formConditionalOp(N, DAG, true);
- if (CAND.getNode())
- return CAND;
+ // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
+ if (Result.getNode())
+ return Result;
}
return SDValue();
@@ -7414,14 +7444,17 @@ static SDValue PerformORCombine(SDNode *N,
}
if (!Subtarget->isThumb1Only()) {
- // (or x, (cmov 0, y, cond)) => (or.cond x, y)
- SDValue COR = formConditionalOp(N, DAG, true);
- if (COR.getNode())
- return COR;
+ // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
}
+ // The code below optimizes (or (and X, Y), Z).
+ // The AND operand needs to have a single user to make these optimizations
+ // profitable.
SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() != ISD::AND)
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
SDValue N1 = N->getOperand(1);
@@ -7578,10 +7611,10 @@ static SDValue PerformXORCombine(SDNode *N,
return SDValue();
if (!Subtarget->isThumb1Only()) {
- // (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
- SDValue CXOR = formConditionalOp(N, DAG, true);
- if (CXOR.getNode())
- return CXOR;
+ // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+ SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+ if (Result.getNode())
+ return Result;
}
return SDValue();
@@ -8802,6 +8835,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
case MVT::i16:
case MVT::i32:
return true;
+ case MVT::f64:
+ return Subtarget->hasNEON();
// FIXME: VLD1 etc with standard alignment is legal.
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7ad48b9..13b83de 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -63,9 +63,6 @@ namespace llvm {
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
- CAND, // ARM conditional and instructions.
- COR, // ARM conditional or instructions.
- CXOR, // ARM conditional xor instructions.
BCC_i64,
@@ -361,7 +358,8 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
Sched::Preference getSchedulingPreference(SDNode *N) const;
@@ -393,9 +391,9 @@ namespace llvm {
///
unsigned ARMPCLabelIndex;
- void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
- void addDRTypeForNEON(EVT VT);
- void addQRTypeForNEON(EVT VT);
+ void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
+ void addDRTypeForNEON(MVT VT);
+ void addQRTypeForNEON(MVT VT);
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
@@ -544,7 +542,8 @@ namespace llvm {
namespace ARM {
- FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
}
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1b8fc3f..992aba5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -242,6 +242,9 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
+def IsLE : Predicate<"TLI.isLittleEndian()">;
+def IsBE : Predicate<"TLI.isBigEndian()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -416,8 +419,11 @@ def pclabel : Operand<i32> {
}
// ADR instruction labels.
+def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; }
def adrlabel : Operand<i32> {
let EncoderMethod = "getAdrLabelOpValue";
+ let ParserMatchClass = AdrLabelAsmOperand;
+ let PrintMethod = "printAdrLabelOperand";
}
def neon_vcvt_imm32 : Operand<i32> {
@@ -968,7 +974,7 @@ include "ARMInstrFormats.td"
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1037,7 +1043,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1285,7 +1291,7 @@ class AI_exta_rrot_np<bits<8> opcod, string opc>
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc, bit Commutable = 0> {
+ bit Commutable = 0> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1351,8 +1357,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
/// AI1_rsc_irs - Define instructions and patterns for rsc
let TwoOperandAliasConstraint = "$Rn = $Rd" in
-multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
- string baseOpc> {
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -2816,9 +2821,6 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
-def : ARMInstAlias<"movs${p} $Rd, $Rm",
- (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
-
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -3029,10 +3031,10 @@ def UBFX : I<(outs GPR:$Rd),
defm ADD : AsI1_bin_irs<0b0100, "add",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>;
+ BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
defm SUB : AsI1_bin_irs<0b0010, "sub",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">;
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set.
//
@@ -3050,15 +3052,13 @@ defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
- "ADC", 1>;
+ BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
- "SBC">;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
-defm RSB : AsI1_rbin_irs <0b0011, "rsb",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">;
+defm RSB : AsI1_rbin_irs<0b0011, "rsb",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
@@ -3066,8 +3066,7 @@ defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
defm RSC : AI1_rsc_irs<0b0111, "rsc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
- "RSC">;
+ BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -3276,16 +3275,16 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
defm AND : AsI1_bin_irs<0b0000, "and",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>;
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm BIC : AsI1_bin_irs<0b1110, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
// like in the actual instruction encoding. The complexity of mapping the mask
@@ -3940,7 +3939,7 @@ def BCCZi64 : PseudoInst<(outs),
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
4, IIC_iCMOVr,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
@@ -3993,25 +3992,29 @@ multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
InstrItinClass iii, InstrItinClass iir,
InstrItinClass iis> {
def ri : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+ (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm,
+ pred:$p, cc_out:$s),
4, iii, [],
(iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
def rr : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm,
+ pred:$p, cc_out:$s),
4, iir, [],
(irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
def rsi : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+ (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift,
+ pred:$p, cc_out:$s),
4, iis, [],
(irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+ (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift,
+ pred:$p, cc_out:$s),
4, iis, [],
(irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
}
defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
@@ -4020,6 +4023,10 @@ defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
} // neverHasSideEffects
@@ -4068,11 +4075,8 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in {
-def ABS : ARMPseudoInst<
- (outs GPR:$dst), (ins GPR:$src),
- 8, NoItinerary, []>;
-}
+let usesCustomInserter = 1, Defs = [CPSR] in
+def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
let usesCustomInserter = 1 in {
let Defs = [CPSR] in {
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3134088..048d340 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> {
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
}
+def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 2;
+}]>;
+def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 2;
+}]>;
+def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() == 1;
+}]>;
+def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() == 1;
+}]>;
+def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
@@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+// Use vld1/vst1 for unaligned f64 load / store
+def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
+ (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
+ (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
+ (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
+ (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
//===----------------------------------------------------------------------===//
// NEON pattern fragments
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index d83530a..8ecf009 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -172,6 +172,7 @@ def t2ldr_pcrel_imm12 : Operand<i32> {
// ADR instruction labels.
def t2adrlabel : Operand<i32> {
let EncoderMethod = "getT2AdrLabelOpValue";
+ let PrintMethod = "printAdrLabelOperand";
}
@@ -529,7 +530,7 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0,
+ PatFrag opnode, bit Commutable = 0,
string wide = ""> {
// shifted imm
def ri : T2sTwoRegImm<
@@ -565,15 +566,15 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn,
t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn,
rGPR:$Rm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn,
t2_so_reg:$shift, pred:$p,
cc_out:$s)>;
}
@@ -582,36 +583,30 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// the ".w" suffix to indicate that they are wide.
multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc, bit Commutable = 0> :
- T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> {
// Assembler aliases w/ the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
- t2_so_imm:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn,
- t2_so_reg:$shift, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
// and with the optional destination operand, too.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- t2_so_imm:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
- t2_so_reg:$shift, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift,
+ pred:$p, cc_out:$s)>;
}
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
@@ -762,6 +757,33 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{24} = 1;
let Inst{23-21} = op23_21;
}
+
+ // Predicated versions.
+ def CCri : t2PseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s), 4, IIC_iALUi, [],
+ (!cast<Instruction>(NAME#ri) GPRnopc:$Rd,
+ GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rfalse = $Rd">;
+ def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm,
+ pred:$p),
+ 4, IIC_iALUi, [],
+ (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd,
+ GPR:$Rn, imm0_4095:$imm, pred:$p)>,
+ RegConstraint<"$Rfalse = $Rd">;
+ def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm,
+ pred:$p, cc_out:$s), 4, IIC_iALUr, [],
+ (!cast<Instruction>(NAME#rr) GPRnopc:$Rd,
+ GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rfalse = $Rd">;
+ def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm,
+ pred:$p, cc_out:$s), 4, IIC_iALUsi, [],
+ (!cast<Instruction>(NAME#rs) GPRnopc:$Rd,
+ GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rfalse = $Rd">;
}
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
@@ -808,8 +830,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
-multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
- string baseOpc> {
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
// 5-bit imm
def ri : T2sTwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
@@ -834,33 +855,27 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
// Optional destination register
def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
// Assembler aliases w/o the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
// and with the optional destination operand, too.
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
- ty:$imm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
- (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
- rGPR:$Rm, pred:$p,
- cc_out:$s)>;
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
}
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
@@ -868,7 +883,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
/// a explicit result, only implicitly set CPSR.
multiclass T2I_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, string baseOpc> {
+ PatFrag opnode> {
let isCompare = 1, Defs = [CPSR] in {
// shifted imm
def ri : T2OneRegCmpImm<
@@ -913,12 +928,9 @@ let isCompare = 1, Defs = [CPSR] in {
// No alias here for 'rr' version as not all instantiations of this
// multiclass want one (CMP in particular, does not).
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
- (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn,
- t2_so_imm:$imm, pred:$p)>;
+ (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
- (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn,
- t2_so_reg:$shift,
- pred:$p)>;
+ (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
@@ -2152,13 +2164,13 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
//
defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
- BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">;
+ BinOpFrag<(shl node:$LHS, node:$RHS)>>;
defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
- BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">;
+ BinOpFrag<(srl node:$LHS, node:$RHS)>>;
defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
- BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">;
+ BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
- BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
+ BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
@@ -2214,18 +2226,17 @@ def t2MOVsra_flag : T2TwoRegShiftImm<
defm t2AND : T2I_bin_w_irs<0b0000, "and",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
+ BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>;
+ BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
+ BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, (not node:$RHS))>,
- "t2BIC">;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
class T2BitFI<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -2305,8 +2316,7 @@ let Constraints = "$src = $Rd" in {
defm t2ORN : T2I_bin_irs<0b0011, "orn",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, (not node:$RHS))>,
- "t2ORN", 0, "">;
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// unary operation that produces a value. These are predicable and can be
@@ -2878,7 +2888,7 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">;
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
(t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
@@ -2932,13 +2942,10 @@ let isCompare = 1, Defs = [CPSR] in {
// Assembler aliases w/o the ".w" suffix.
// No alias here for 'rr' version as not all instantiations of this multiclass
// want one (CMP in particular, does not).
-def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $imm"),
- (!cast<Instruction>(!strconcat("t2CMN", "ri")) GPRnopc:$Rn,
- t2_so_imm:$imm, pred:$p)>;
-def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $shift"),
- (!cast<Instruction>(!strconcat("t2CMNz", "rs")) GPRnopc:$Rn,
- t2_so_reg:$shift,
- pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $imm",
+ (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $shift",
+ (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
(t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
@@ -2948,19 +2955,17 @@ def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
defm t2TST : T2I_cmp_irs<0b0000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>,
- "t2TST">;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>,
- "t2TEQ">;
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$false, rGPR:$Rm, pred:$p),
4, IIC_iCMOVr,
@@ -3048,22 +3053,25 @@ multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
// shifted imm
def ri : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+ (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm,
+ pred:$p, cc_out:$s),
4, iii, [],
(iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
// register
def rr : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+ (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm,
+ pred:$p, cc_out:$s),
4, iir, [],
(irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
// shifted register
def rs : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+ (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s),
4, iis, [],
(irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rn = $Rd">;
+ RegConstraint<"$Rfalse = $Rd">;
} // T2I_bincc_irs
defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 23c132e..7d6692f 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>,
let ParserMatchClass = FPImmOperand;
}
+def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
// The VCVT to/from fixed-point instructions encode the 'fbits' operand
// (the number of fixed bits) differently than it appears in the assembly
// source. It's encoded as "Size - fbits" where Size is the size of the
@@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
IIC_fpLoad64, "vldr", "\t$Dd, $addr",
- [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+ [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
IIC_fpLoad32, "vldr", "\t$Sd, $addr",
@@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
IIC_fpStore64, "vstr", "\t$Dd, $addr",
- [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+ [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
IIC_fpStore32, "vstr", "\t$Sd, $addr",
@@ -433,25 +442,25 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half-precision and single-precision. For disassembly only.
// FIXME: Verify encoding after integrated assembler is working.
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def : ARMPat<(f32_to_f16 SPR:$a),
- (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-
-def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def : ARMPat<(f16_to_f32 GPR:$a),
- (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+def : Pat<(f32_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def : Pat<(f16_to_f32 GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index c5db211..357fc3f 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -291,9 +291,9 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
ResultPtr = ResultPtr >> 2;
*((intptr_t*)RelocPos) |= ResultPtr;
- // Set register Rn to PC.
- *((intptr_t*)RelocPos) |=
- getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ // Set register Rn to PC (which is register 15 on all architectures).
+ // FIXME: This avoids the need for register info in the JIT class.
+ *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
break;
}
case ARM::reloc_arm_so_imm_cp_entry: {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index cb1b2a2..897ceb6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
DebugLoc dl = Loc->getDebugLoc();
const MachineOperand &PMO = Loc->getOperand(0);
unsigned PReg = PMO.getReg();
- unsigned PRegNum = PMO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(PReg);
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Count = 1;
unsigned Limit = ~0U;
@@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
int NewOffset = MemOps[i].Offset;
const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
unsigned Reg = MO.getReg();
- unsigned RegNum = MO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(Reg);
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
// Register numbers must be in ascending order. For VFP / NEON load and
// store multiples, the registers must also be consecutive and within the
// limit on the number of registers per instruction.
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 3857647..6f974fd 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -12,16 +12,16 @@
//===----------------------------------------------------------------------===//
// Registers are identified with 4-bit ID numbers.
-class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
- field bits<4> Num;
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "ARM";
let SubRegs = subregs;
// All bits of ARM registers with sub-registers are covered by sub-registers.
let CoveredBySubRegs = 1;
}
-class ARMFReg<bits<6> num, string n> : Register<n> {
- field bits<6> Num;
+class ARMFReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "ARM";
}
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 56197d4..2c63825 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1069,6 +1069,7 @@ def CortexA8Model : SchedMachineModel {
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
+ let MispredictPenalty = 13; // Based on estimate of pipeline depth.
let Itineraries = CortexA8Itineraries;
}
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 738974e..7bc590f 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1886,6 +1886,7 @@ def CortexA9Model : SchedMachineModel {
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
+ let MispredictPenalty = 8; // Based on estimate of pipeline depth.
let Itineraries = CortexA9Itineraries;
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e067a9f..89e29ad 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -97,6 +97,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
if (!HasV6T2Ops && hasThumb2())
HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+ // Keep a pointer to static instruction cost data for the specified CPU.
+ SchedModel = getSchedModelForCPU(CPUString);
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
@@ -179,15 +182,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
}
unsigned ARMSubtarget::getMispredictionPenalty() const {
- // If we have a reasonable estimate of the pipeline depth, then we can
- // estimate the penalty of a misprediction based on that.
- if (isCortexA8())
- return 13;
- else if (isCortexA9())
- return 8;
-
- // Otherwise, just return a sensible default.
- return 10;
+ return SchedModel->MispredictPenalty;
}
bool ARMSubtarget::enablePostRAScheduler(
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e72b06f..b394061 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -74,7 +74,7 @@ protected:
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
- /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
+ /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
/// v6m, v7m for example.
bool IsMClass;
@@ -155,6 +155,9 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+ /// SchedModel - Processor specific instruction costs.
+ const MCSchedModel *SchedModel;
+
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4497720..3a5957b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -796,6 +796,13 @@ public:
int64_t Value = CE->getValue();
return Value > 0 && Value <= 32;
}
+ bool isAdrLabel() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, but it can't fit
+ // into shift immediate encoding, we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm())) return true;
+ else return (isARMSOImm() || isARMSOImmNeg());
+ }
bool isARMSOImm() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -1033,7 +1040,8 @@ public:
// Immediate offset a multiple of 4 in range [-1020, 1020].
if (!Memory.OffsetImm) return true;
int64_t Val = Memory.OffsetImm->getValue();
- return Val >= -1020 && Val <= 1020 && (Val & 3) == 0;
+ // Special case, #-0 is INT32_MIN.
+ return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
}
bool isMemImm0_1020s4Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -1644,6 +1652,22 @@ public:
Inst.addOperand(MCOperand::CreateImm(Imm));
}
+ void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(isImm() && "Not an immediate!");
+
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup.
+ if (!isa<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ return;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ int Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ }
+
void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -2884,7 +2908,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(EndReg))
return Error(EndLoc, "invalid register in register list");
// Ranges must go from low to high.
- if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg))
+ if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
return Error(EndLoc, "bad range in register list");
// Add all the registers in the range to the register list.
@@ -2911,13 +2935,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+ if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
Warning(RegLoc, "register list not in ascending order");
else
return Error(RegLoc, "register list not in ascending order");
}
- if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
Warning(RegLoc, "duplicated register (" + RegTok.getString() +
") in register list");
continue;
@@ -3256,29 +3280,59 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- if (!Tok.is(AsmToken::Identifier))
- return MatchOperand_NoMatch;
- StringRef OptStr = Tok.getString();
-
- unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
- .Case("sy", ARM_MB::SY)
- .Case("st", ARM_MB::ST)
- .Case("sh", ARM_MB::ISH)
- .Case("ish", ARM_MB::ISH)
- .Case("shst", ARM_MB::ISHST)
- .Case("ishst", ARM_MB::ISHST)
- .Case("nsh", ARM_MB::NSH)
- .Case("un", ARM_MB::NSH)
- .Case("nshst", ARM_MB::NSHST)
- .Case("unst", ARM_MB::NSHST)
- .Case("osh", ARM_MB::OSH)
- .Case("oshst", ARM_MB::OSHST)
- .Default(~0U);
+ unsigned Opt;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef OptStr = Tok.getString();
+
+ Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("sh", ARM_MB::ISH)
+ .Case("ish", ARM_MB::ISH)
+ .Case("shst", ARM_MB::ISHST)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("un", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("unst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
- if (Opt == ~0U)
- return MatchOperand_NoMatch;
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ } else if (Tok.is(AsmToken::Hash) ||
+ Tok.is(AsmToken::Dollar) ||
+ Tok.is(AsmToken::Integer)) {
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ Parser.Lex(); // Eat the '#'.
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ const MCExpr *MemBarrierID;
+ if (getParser().ParseExpression(MemBarrierID)) {
+ Error(Loc, "illegal expression");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID);
+ if (!CE) {
+ Error(Loc, "constant expression expected");
+ return MatchOperand_ParseFail;
+ }
+
+ int Val = CE->getValue();
+ if (Val & ~0xf) {
+ Error(Loc, "immediate value out of range");
+ return MatchOperand_ParseFail;
+ }
+
+ Opt = ARM_MB::RESERVED_0 + Val;
+ } else
+ return MatchOperand_ParseFail;
- Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
return MatchOperand_Success;
}
@@ -5250,8 +5304,8 @@ validateInstruction(MCInst &Inst,
case ARM::LDRD_POST:
case ARM::LDREXD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"destination operands must be sequential");
@@ -5259,8 +5313,8 @@ validateInstruction(MCInst &Inst,
}
case ARM::STRD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"source operands must be sequential");
@@ -5270,8 +5324,8 @@ validateInstruction(MCInst &Inst,
case ARM::STRD_POST:
case ARM::STREXD: {
// Rt2 must be Rt + 1.
- unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg());
- unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg());
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"source operands must be sequential");
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 47cca2a..c90751d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -18,10 +18,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -383,7 +385,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
-#include "ARMGenInstrInfo.inc"
#include "ARMGenEDInfo.inc"
static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
@@ -427,7 +428,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
(bytes[0] << 0);
// Calling the auto-generated decoder function.
- DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI);
+ DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
return result;
@@ -436,14 +438,15 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// VFP and NEON instructions, similarly, are shared between ARM
// and Thumb modes.
MI.clear();
- result = decodeVFPInstruction32(MI, insn, Address, this, STI);
+ result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
return result;
}
MI.clear();
- result = decodeNEONDataInstruction32(MI, insn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
@@ -454,7 +457,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
@@ -465,7 +469,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeNEONDupInstruction32(MI, insn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
@@ -765,7 +770,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
uint16_t insn16 = (bytes[1] << 8) | bytes[0];
- DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI);
+ DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 2;
Check(result, AddThumbPredicate(MI));
@@ -773,7 +779,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
+ result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16,
+ Address, this, STI);
if (result) {
Size = 2;
bool InITBlock = ITBlock.instrInITBlock();
@@ -783,7 +790,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeThumb2Instruction16(MI, insn16, Address, this, STI);
+ result = decodeInstruction(DecoderTableThumb216, MI, insn16,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 2;
@@ -818,7 +826,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
(bytes[1] << 24) |
(bytes[0] << 16);
MI.clear();
- result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
+ result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
bool InITBlock = ITBlock.instrInITBlock();
@@ -828,7 +837,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeThumb2Instruction32(MI, insn32, Address, this, STI);
+ result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
Check(result, AddThumbPredicate(MI));
@@ -836,7 +846,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeVFPInstruction32(MI, insn32, Address, this, STI);
+ result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
UpdateThumbVFPPredicate(MI);
@@ -844,19 +854,21 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
- result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
+ this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
Check(result, AddThumbPredicate(MI));
return result;
}
- if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) {
+ if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
MI.clear();
uint32_t NEONLdStInsn = insn32;
NEONLdStInsn &= 0xF0FFFFFF;
NEONLdStInsn |= 0x04000000;
- result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
Check(result, AddThumbPredicate(MI));
@@ -864,13 +876,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (fieldFromInstruction32(insn32, 24, 4) == 0xF) {
+ if (fieldFromInstruction(insn32, 24, 4) == 0xF) {
MI.clear();
uint32_t NEONDataInsn = insn32;
NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
NEONDataInsn |= 0x12000000; // Set bits 28 and 25
- result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI);
+ result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+ Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
Check(result, AddThumbPredicate(MI));
@@ -1117,9 +1130,9 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- unsigned type = fieldFromInstruction32(Val, 5, 2);
- unsigned imm = fieldFromInstruction32(Val, 7, 5);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned imm = fieldFromInstruction(Val, 7, 5);
// Register-immediate
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -1154,9 +1167,9 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- unsigned type = fieldFromInstruction32(Val, 5, 2);
- unsigned Rs = fieldFromInstruction32(Val, 8, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned Rs = fieldFromInstruction(Val, 8, 4);
// Register-register
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
@@ -1224,8 +1237,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Vd = fieldFromInstruction32(Val, 8, 5);
- unsigned regs = fieldFromInstruction32(Val, 0, 8);
+ unsigned Vd = fieldFromInstruction(Val, 8, 5);
+ unsigned regs = fieldFromInstruction(Val, 0, 8);
if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -1241,8 +1254,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Vd = fieldFromInstruction32(Val, 8, 5);
- unsigned regs = fieldFromInstruction32(Val, 0, 8);
+ unsigned Vd = fieldFromInstruction(Val, 8, 5);
+ unsigned regs = fieldFromInstruction(Val, 0, 8);
regs = regs >> 1;
@@ -1263,8 +1276,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
// the mask of all bits LSB-and-lower, and then xor them to create
// the mask of that's all ones on [msb, lsb]. Finally we not it to
// create the final mask.
- unsigned msb = fieldFromInstruction32(Val, 5, 5);
- unsigned lsb = fieldFromInstruction32(Val, 0, 5);
+ unsigned msb = fieldFromInstruction(Val, 5, 5);
+ unsigned lsb = fieldFromInstruction(Val, 0, 5);
DecodeStatus S = MCDisassembler::Success;
if (lsb > msb) Check(S, MCDisassembler::SoftFail);
@@ -1281,12 +1294,12 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned CRd = fieldFromInstruction32(Insn, 12, 4);
- unsigned coproc = fieldFromInstruction32(Insn, 8, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 8);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned U = fieldFromInstruction32(Insn, 23, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned CRd = fieldFromInstruction(Insn, 12, 4);
+ unsigned coproc = fieldFromInstruction(Insn, 8, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
switch (Inst.getOpcode()) {
case ARM::LDC_OFFSET:
@@ -1426,14 +1439,14 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned reg = fieldFromInstruction32(Insn, 25, 1);
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned reg = fieldFromInstruction(Insn, 25, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
// On stores, the writeback operand precedes Rt.
switch (Inst.getOpcode()) {
@@ -1476,7 +1489,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
ARM_AM::AddrOpc Op = ARM_AM::add;
- if (!fieldFromInstruction32(Insn, 23, 1))
+ if (!fieldFromInstruction(Insn, 23, 1))
Op = ARM_AM::sub;
bool writeback = (P == 0) || (W == 1);
@@ -1493,7 +1506,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
- switch( fieldFromInstruction32(Insn, 5, 2)) {
+ switch( fieldFromInstruction(Insn, 5, 2)) {
case 0:
Opc = ARM_AM::lsl;
break;
@@ -1509,7 +1522,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
}
- unsigned amt = fieldFromInstruction32(Insn, 7, 5);
+ unsigned amt = fieldFromInstruction(Insn, 7, 5);
unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
Inst.addOperand(MCOperand::CreateImm(imm));
@@ -1529,11 +1542,11 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 13, 4);
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- unsigned type = fieldFromInstruction32(Val, 5, 2);
- unsigned imm = fieldFromInstruction32(Val, 7, 5);
- unsigned U = fieldFromInstruction32(Val, 12, 1);
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned type = fieldFromInstruction(Val, 5, 2);
+ unsigned imm = fieldFromInstruction(Val, 7, 5);
+ unsigned U = fieldFromInstruction(Val, 12, 1);
ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
switch (type) {
@@ -1570,15 +1583,15 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned type = fieldFromInstruction32(Insn, 22, 1);
- unsigned imm = fieldFromInstruction32(Insn, 8, 4);
- unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned type = fieldFromInstruction(Insn, 22, 1);
+ unsigned imm = fieldFromInstruction(Insn, 8, 4);
+ unsigned U = ((~fieldFromInstruction(Insn, 23, 1)) & 1) << 8;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
unsigned Rt2 = Rt + 1;
bool writeback = (W == 1) | (P == 0);
@@ -1609,7 +1622,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
S = MCDisassembler::SoftFail;
if (Rt2 == 15)
S = MCDisassembler::SoftFail;
- if (!type && fieldFromInstruction32(Insn, 8, 4))
+ if (!type && fieldFromInstruction(Insn, 8, 4))
S = MCDisassembler::SoftFail;
break;
case ARM::STRH:
@@ -1761,8 +1774,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned mode = fieldFromInstruction32(Insn, 23, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned mode = fieldFromInstruction(Insn, 23, 2);
switch (mode) {
case 0:
@@ -1791,9 +1804,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned reglist = fieldFromInstruction32(Insn, 0, 16);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned reglist = fieldFromInstruction(Insn, 0, 16);
if (pred == 0xF) {
switch (Inst.getOpcode()) {
@@ -1850,9 +1863,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
}
// For stores (which become SRS's, the only operand is the mode.
- if (fieldFromInstruction32(Insn, 20, 1) == 0) {
+ if (fieldFromInstruction(Insn, 20, 1) == 0) {
Inst.addOperand(
- MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4)));
+ MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4)));
return S;
}
@@ -1873,10 +1886,10 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned imod = fieldFromInstruction32(Insn, 18, 2);
- unsigned M = fieldFromInstruction32(Insn, 17, 1);
- unsigned iflags = fieldFromInstruction32(Insn, 6, 3);
- unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+ unsigned imod = fieldFromInstruction(Insn, 18, 2);
+ unsigned M = fieldFromInstruction(Insn, 17, 1);
+ unsigned iflags = fieldFromInstruction(Insn, 6, 3);
+ unsigned mode = fieldFromInstruction(Insn, 0, 5);
DecodeStatus S = MCDisassembler::Success;
@@ -1913,10 +1926,10 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned imod = fieldFromInstruction32(Insn, 9, 2);
- unsigned M = fieldFromInstruction32(Insn, 8, 1);
- unsigned iflags = fieldFromInstruction32(Insn, 5, 3);
- unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+ unsigned imod = fieldFromInstruction(Insn, 9, 2);
+ unsigned M = fieldFromInstruction(Insn, 8, 1);
+ unsigned iflags = fieldFromInstruction(Insn, 5, 3);
+ unsigned mode = fieldFromInstruction(Insn, 0, 5);
DecodeStatus S = MCDisassembler::Success;
@@ -1955,13 +1968,13 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 8, 4);
unsigned imm = 0;
- imm |= (fieldFromInstruction32(Insn, 0, 8) << 0);
- imm |= (fieldFromInstruction32(Insn, 12, 3) << 8);
- imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
- imm |= (fieldFromInstruction32(Insn, 26, 1) << 11);
+ imm |= (fieldFromInstruction(Insn, 0, 8) << 0);
+ imm |= (fieldFromInstruction(Insn, 12, 3) << 8);
+ imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction(Insn, 26, 1) << 11);
if (Inst.getOpcode() == ARM::t2MOVTi16)
if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -1979,12 +1992,12 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
unsigned imm = 0;
- imm |= (fieldFromInstruction32(Insn, 0, 12) << 0);
- imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+ imm |= (fieldFromInstruction(Insn, 0, 12) << 0);
+ imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
if (Inst.getOpcode() == ARM::MOVTi16)
if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -2005,11 +2018,11 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 8, 4);
- unsigned Ra = fieldFromInstruction32(Insn, 12, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 8, 4);
+ unsigned Ra = fieldFromInstruction(Insn, 12, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (pred == 0xF)
return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
@@ -2033,9 +2046,9 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned add = fieldFromInstruction32(Val, 12, 1);
- unsigned imm = fieldFromInstruction32(Val, 0, 12);
- unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+ unsigned add = fieldFromInstruction(Val, 12, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2053,9 +2066,9 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 9, 4);
- unsigned U = fieldFromInstruction32(Val, 8, 1);
- unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned U = fieldFromInstruction(Val, 8, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2077,11 +2090,11 @@ static DecodeStatus
DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) |
- (fieldFromInstruction32(Insn, 11, 1) << 18) |
- (fieldFromInstruction32(Insn, 13, 1) << 17) |
- (fieldFromInstruction32(Insn, 16, 6) << 11) |
- (fieldFromInstruction32(Insn, 26, 1) << 19);
+ unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) |
+ (fieldFromInstruction(Insn, 11, 1) << 18) |
+ (fieldFromInstruction(Insn, 13, 1) << 17) |
+ (fieldFromInstruction(Insn, 16, 6) << 11) |
+ (fieldFromInstruction(Insn, 26, 1) << 19);
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
@@ -2093,12 +2106,12 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 24) << 2;
if (pred == 0xF) {
Inst.setOpcode(ARM::BLXi);
- imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+ imm |= fieldFromInstruction(Insn, 24, 1) << 1;
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
@@ -2119,8 +2132,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- unsigned align = fieldFromInstruction32(Val, 4, 2);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ unsigned align = fieldFromInstruction(Val, 4, 2);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2136,12 +2149,12 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned wb = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
// First output register
switch (Inst.getOpcode()) {
@@ -2410,12 +2423,12 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned wb = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned wb = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
// Writeback Operand
switch (Inst.getOpcode()) {
@@ -2681,12 +2694,12 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned align = fieldFromInstruction32(Insn, 4, 1);
- unsigned size = fieldFromInstruction32(Insn, 6, 2);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
align *= (1 << size);
@@ -2726,12 +2739,12 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned align = fieldFromInstruction32(Insn, 4, 1);
- unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
+ unsigned size = 1 << fieldFromInstruction(Insn, 6, 2);
align *= 2*size;
switch (Inst.getOpcode()) {
@@ -2774,11 +2787,11 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2809,13 +2822,13 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned size = fieldFromInstruction32(Insn, 6, 2);
- unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
- unsigned align = fieldFromInstruction32(Insn, 4, 1);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned size = fieldFromInstruction(Insn, 6, 2);
+ unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
+ unsigned align = fieldFromInstruction(Insn, 4, 1);
if (size == 0x3) {
size = 4;
@@ -2862,14 +2875,14 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned imm = fieldFromInstruction32(Insn, 0, 4);
- imm |= fieldFromInstruction32(Insn, 16, 3) << 4;
- imm |= fieldFromInstruction32(Insn, 24, 1) << 7;
- imm |= fieldFromInstruction32(Insn, 8, 4) << 8;
- imm |= fieldFromInstruction32(Insn, 5, 1) << 12;
- unsigned Q = fieldFromInstruction32(Insn, 6, 1);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
+ imm |= fieldFromInstruction(Insn, 16, 3) << 4;
+ imm |= fieldFromInstruction(Insn, 24, 1) << 7;
+ imm |= fieldFromInstruction(Insn, 8, 4) << 8;
+ imm |= fieldFromInstruction(Insn, 5, 1) << 12;
+ unsigned Q = fieldFromInstruction(Insn, 6, 1);
if (Q) {
if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -2907,11 +2920,11 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 18, 2);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 18, 2);
if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2950,13 +2963,13 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- Rn |= fieldFromInstruction32(Insn, 7, 1) << 4;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
- unsigned op = fieldFromInstruction32(Insn, 6, 1);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ Rn |= fieldFromInstruction(Insn, 7, 1) << 4;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+ unsigned op = fieldFromInstruction(Insn, 6, 1);
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2986,8 +2999,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned dst = fieldFromInstruction16(Insn, 8, 3);
- unsigned imm = fieldFromInstruction16(Insn, 0, 8);
+ unsigned dst = fieldFromInstruction(Insn, 8, 3);
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3034,8 +3047,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 0, 3);
- unsigned Rm = fieldFromInstruction32(Val, 3, 3);
+ unsigned Rn = fieldFromInstruction(Val, 0, 3);
+ unsigned Rm = fieldFromInstruction(Val, 3, 3);
if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3049,8 +3062,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 0, 3);
- unsigned imm = fieldFromInstruction32(Val, 3, 5);
+ unsigned Rn = fieldFromInstruction(Val, 0, 3);
+ unsigned imm = fieldFromInstruction(Val, 3, 5);
if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3081,9 +3094,9 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 6, 4);
- unsigned Rm = fieldFromInstruction32(Val, 2, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 2);
+ unsigned Rn = fieldFromInstruction(Val, 6, 4);
+ unsigned Rm = fieldFromInstruction(Val, 2, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 2);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3104,13 +3117,13 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
case ARM::t2PLIs:
break;
default: {
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
}
}
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
if (Rn == 0xF) {
switch (Inst.getOpcode()) {
case ARM::t2LDRBs:
@@ -3133,16 +3146,16 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
- int imm = fieldFromInstruction32(Insn, 0, 12);
- if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1;
+ int imm = fieldFromInstruction(Insn, 0, 12);
+ if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1;
Inst.addOperand(MCOperand::CreateImm(imm));
return S;
}
- unsigned addrmode = fieldFromInstruction32(Insn, 4, 2);
- addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2;
- addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6;
+ unsigned addrmode = fieldFromInstruction(Insn, 4, 2);
+ addrmode |= fieldFromInstruction(Insn, 0, 4) << 2;
+ addrmode |= fieldFromInstruction(Insn, 16, 4) << 6;
if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3151,9 +3164,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- int imm = Val & 0xFF;
- if (!(Val & 0x100)) imm *= -1;
- Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ if (Val == 0)
+ Inst.addOperand(MCOperand::CreateImm(INT32_MIN));
+ else {
+ int imm = Val & 0xFF;
+
+ if (!(Val & 0x100)) imm *= -1;
+ Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ }
return MCDisassembler::Success;
}
@@ -3162,8 +3180,8 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 9, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 9);
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 9);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3177,8 +3195,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 8, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ unsigned Rn = fieldFromInstruction(Val, 8, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3205,8 +3223,8 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 9, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 9);
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 9);
// Some instructions always use an additive offset.
switch (Inst.getOpcode()) {
@@ -3236,12 +3254,12 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned addr = fieldFromInstruction32(Insn, 0, 8);
- addr |= fieldFromInstruction32(Insn, 9, 1) << 8;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ addr |= fieldFromInstruction(Insn, 9, 1) << 8;
addr |= Rn << 9;
- unsigned load = fieldFromInstruction32(Insn, 20, 1);
+ unsigned load = fieldFromInstruction(Insn, 20, 1);
if (!load) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3266,8 +3284,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 13, 4);
- unsigned imm = fieldFromInstruction32(Val, 0, 12);
+ unsigned Rn = fieldFromInstruction(Val, 13, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 12);
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3279,7 +3297,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
- unsigned imm = fieldFromInstruction16(Insn, 0, 7);
+ unsigned imm = fieldFromInstruction(Insn, 0, 7);
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
@@ -3293,8 +3311,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
DecodeStatus S = MCDisassembler::Success;
if (Inst.getOpcode() == ARM::tADDrSP) {
- unsigned Rdm = fieldFromInstruction16(Insn, 0, 3);
- Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3;
+ unsigned Rdm = fieldFromInstruction(Insn, 0, 3);
+ Rdm |= fieldFromInstruction(Insn, 7, 1) << 3;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3302,7 +3320,7 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
return MCDisassembler::Fail;
} else if (Inst.getOpcode() == ARM::tADDspr) {
- unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 3, 4);
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
Inst.addOperand(MCOperand::CreateReg(ARM::SP));
@@ -3315,8 +3333,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder) {
- unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
- unsigned flags = fieldFromInstruction16(Insn, 0, 3);
+ unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
+ unsigned flags = fieldFromInstruction(Insn, 0, 3);
Inst.addOperand(MCOperand::CreateImm(imod));
Inst.addOperand(MCOperand::CreateImm(flags));
@@ -3327,8 +3345,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned add = fieldFromInstruction32(Insn, 4, 1);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned add = fieldFromInstruction(Insn, 4, 1);
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3375,8 +3393,8 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3391,9 +3409,9 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned pred = fieldFromInstruction32(Insn, 22, 4);
+ unsigned pred = fieldFromInstruction(Insn, 22, 4);
if (pred == 0xE || pred == 0xF) {
- unsigned opc = fieldFromInstruction32(Insn, 4, 28);
+ unsigned opc = fieldFromInstruction(Insn, 4, 28);
switch (opc) {
default:
return MCDisassembler::Fail;
@@ -3408,15 +3426,15 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
break;
}
- unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
}
- unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1;
- brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19;
- brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18;
- brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12;
- brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20;
+ unsigned brtarget = fieldFromInstruction(Insn, 0, 11) << 1;
+ brtarget |= fieldFromInstruction(Insn, 11, 1) << 19;
+ brtarget |= fieldFromInstruction(Insn, 13, 1) << 18;
+ brtarget |= fieldFromInstruction(Insn, 16, 6) << 12;
+ brtarget |= fieldFromInstruction(Insn, 26, 1) << 20;
if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3431,10 +3449,10 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
// a splat operation or a rotation.
static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
+ unsigned ctrl = fieldFromInstruction(Val, 10, 2);
if (ctrl == 0) {
- unsigned byte = fieldFromInstruction32(Val, 8, 2);
- unsigned imm = fieldFromInstruction32(Val, 0, 8);
+ unsigned byte = fieldFromInstruction(Val, 8, 2);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
switch (byte) {
case 0:
Inst.addOperand(MCOperand::CreateImm(imm));
@@ -3451,8 +3469,8 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
break;
}
} else {
- unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80;
- unsigned rot = fieldFromInstruction32(Val, 7, 5);
+ unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80;
+ unsigned rot = fieldFromInstruction(Val, 7, 5);
unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
Inst.addOperand(MCOperand::CreateImm(imm));
}
@@ -3494,19 +3512,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- switch (Val) {
- default:
+ if (Val & ~0xf)
return MCDisassembler::Fail;
- case 0xF: // SY
- case 0xE: // ST
- case 0xB: // ISH
- case 0xA: // ISHST
- case 0x7: // NSH
- case 0x6: // NSHST
- case 0x3: // OSH
- case 0x2: // OSHST
- break;
- }
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
@@ -3523,9 +3530,9 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
@@ -3546,10 +3553,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder){
DecodeStatus S = MCDisassembler::Success;
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3573,12 +3580,12 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
- imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
@@ -3598,13 +3605,13 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
- imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
if (Rm == 0xF) S = MCDisassembler::SoftFail;
@@ -3626,12 +3633,12 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
- imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
@@ -3651,12 +3658,12 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned imm = fieldFromInstruction32(Insn, 0, 12);
- imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
- imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 12);
+ imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+ imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
@@ -3676,11 +3683,11 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3688,22 +3695,22 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 6, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 2) != 0)
align = 4;
}
@@ -3735,11 +3742,11 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3747,22 +3754,22 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 6, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 4, 2) != 0)
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 2) != 0)
align = 4;
}
@@ -3793,11 +3800,11 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3806,24 +3813,24 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- index = fieldFromInstruction32(Insn, 5, 3);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 5, 3);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 2;
break;
case 1:
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 4;
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 1) != 0)
align = 8;
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -3860,11 +3867,11 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3873,24 +3880,24 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- index = fieldFromInstruction32(Insn, 5, 3);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 5, 3);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 2;
break;
case 1:
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 4, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 4, 1))
align = 4;
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 5, 1))
+ if (fieldFromInstruction(Insn, 5, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 4, 1) != 0)
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 4, 1) != 0)
align = 8;
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -3924,11 +3931,11 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -3937,22 +3944,22 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 5, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 4, 2))
+ if (fieldFromInstruction(Insn, 4, 2))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 6, 1))
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -3994,11 +4001,11 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -4007,22 +4014,22 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 5, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 4, 2))
+ if (fieldFromInstruction(Insn, 4, 2))
return MCDisassembler::Fail; // UNDEFINED
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 6, 1))
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -4058,11 +4065,11 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -4071,22 +4078,22 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
align = 4;
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
align = 8;
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 5, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 4, 2))
- align = 4 << fieldFromInstruction32(Insn, 4, 2);
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 4, 2))
+ align = 4 << fieldFromInstruction(Insn, 4, 2);
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -4132,11 +4139,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
- Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
- unsigned size = fieldFromInstruction32(Insn, 10, 2);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+ Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+ unsigned size = fieldFromInstruction(Insn, 10, 2);
unsigned align = 0;
unsigned index = 0;
@@ -4145,22 +4152,22 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
default:
return MCDisassembler::Fail;
case 0:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
align = 4;
- index = fieldFromInstruction32(Insn, 5, 3);
+ index = fieldFromInstruction(Insn, 5, 3);
break;
case 1:
- if (fieldFromInstruction32(Insn, 4, 1))
+ if (fieldFromInstruction(Insn, 4, 1))
align = 8;
- index = fieldFromInstruction32(Insn, 6, 2);
- if (fieldFromInstruction32(Insn, 5, 1))
+ index = fieldFromInstruction(Insn, 6, 2);
+ if (fieldFromInstruction(Insn, 5, 1))
inc = 2;
break;
case 2:
- if (fieldFromInstruction32(Insn, 4, 2))
- align = 4 << fieldFromInstruction32(Insn, 4, 2);
- index = fieldFromInstruction32(Insn, 7, 1);
- if (fieldFromInstruction32(Insn, 6, 1))
+ if (fieldFromInstruction(Insn, 4, 2))
+ align = 4 << fieldFromInstruction(Insn, 4, 2);
+ index = fieldFromInstruction(Insn, 7, 1);
+ if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
break;
}
@@ -4196,11 +4203,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 5, 1);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 5, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
S = MCDisassembler::SoftFail;
@@ -4222,11 +4229,11 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
- unsigned Rm = fieldFromInstruction32(Insn, 5, 1);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
- Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 5, 1);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
S = MCDisassembler::SoftFail;
@@ -4248,8 +4255,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned pred = fieldFromInstruction16(Insn, 4, 4);
- unsigned mask = fieldFromInstruction16(Insn, 0, 4);
+ unsigned pred = fieldFromInstruction(Insn, 4, 4);
+ unsigned mask = fieldFromInstruction(Insn, 0, 4);
if (pred == 0xF) {
pred = 0xE;
@@ -4271,13 +4278,13 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned addr = fieldFromInstruction32(Insn, 0, 8);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
- unsigned U = fieldFromInstruction32(Insn, 23, 1);
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
bool writeback = (W == 1) | (P == 0);
addr |= (U << 8) | (Rn << 9);
@@ -4308,13 +4315,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned addr = fieldFromInstruction32(Insn, 0, 8);
- unsigned W = fieldFromInstruction32(Insn, 21, 1);
- unsigned U = fieldFromInstruction32(Insn, 23, 1);
- unsigned P = fieldFromInstruction32(Insn, 24, 1);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned addr = fieldFromInstruction(Insn, 0, 8);
+ unsigned W = fieldFromInstruction(Insn, 21, 1);
+ unsigned U = fieldFromInstruction(Insn, 23, 1);
+ unsigned P = fieldFromInstruction(Insn, 24, 1);
bool writeback = (W == 1) | (P == 0);
addr |= (U << 8) | (Rn << 9);
@@ -4340,13 +4347,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
uint64_t Address, const void *Decoder) {
- unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
- unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
+ unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
+ unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
if (sign1 != sign2) return MCDisassembler::Fail;
- unsigned Val = fieldFromInstruction32(Insn, 0, 8);
- Val |= fieldFromInstruction32(Insn, 12, 3) << 8;
- Val |= fieldFromInstruction32(Insn, 26, 1) << 11;
+ unsigned Val = fieldFromInstruction(Insn, 0, 8);
+ Val |= fieldFromInstruction(Insn, 12, 3) << 8;
+ Val |= fieldFromInstruction(Insn, 26, 1) << 11;
Val |= sign1 << 12;
Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
@@ -4366,10 +4373,10 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4);
- unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
- unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+ unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
if (pred == 0xF)
return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
@@ -4393,12 +4400,12 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
- Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
- unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
- Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
- unsigned imm = fieldFromInstruction32(Insn, 16, 6);
- unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
DecodeStatus S = MCDisassembler::Success;
@@ -4421,12 +4428,12 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
- Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
- unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
- Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
- unsigned imm = fieldFromInstruction32(Insn, 16, 6);
- unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+ unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
DecodeStatus S = MCDisassembler::Success;
@@ -4451,13 +4458,13 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- unsigned Rn = fieldFromInstruction32(Val, 16, 4);
- unsigned Rt = fieldFromInstruction32(Val, 12, 4);
- unsigned Rm = fieldFromInstruction32(Val, 0, 4);
- Rm |= (fieldFromInstruction32(Val, 23, 1) << 4);
- unsigned Cond = fieldFromInstruction32(Val, 28, 4);
+ unsigned Rn = fieldFromInstruction(Val, 16, 4);
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ unsigned Rm = fieldFromInstruction(Val, 0, 4);
+ Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
+ unsigned Cond = fieldFromInstruction(Val, 28, 4);
- if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt)
+ if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
S = MCDisassembler::SoftFail;
if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
@@ -4479,11 +4486,11 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
DecodeStatus S = MCDisassembler::Success;
- unsigned CRm = fieldFromInstruction32(Val, 0, 4);
- unsigned opc1 = fieldFromInstruction32(Val, 4, 4);
- unsigned cop = fieldFromInstruction32(Val, 8, 4);
- unsigned Rt = fieldFromInstruction32(Val, 12, 4);
- unsigned Rt2 = fieldFromInstruction32(Val, 16, 4);
+ unsigned CRm = fieldFromInstruction(Val, 0, 4);
+ unsigned opc1 = fieldFromInstruction(Val, 4, 4);
+ unsigned cop = fieldFromInstruction(Val, 8, 4);
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ unsigned Rt2 = fieldFromInstruction(Val, 16, 4);
if ((cop & ~0x1) == 0xa)
return MCDisassembler::Fail;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2f6b1b0..8b9109e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -792,6 +792,25 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
}
+void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (MO.isExpr()) {
+ O << *MO.getExpr();
+ return;
+ }
+
+ int32_t OffImm = (int32_t)MO.getImm();
+
+ if (OffImm == INT32_MIN)
+ O << "#-0";
+ else if (OffImm < 0)
+ O << "#-" << -OffImm;
+ else
+ O << "#" << OffImm;
+}
+
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "#" << MI->getOperand(OpNum).getImm() * 4;
@@ -953,12 +972,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
O << "[" << getRegisterName(MO1.getReg());
- int32_t OffImm = (int32_t)MO2.getImm() / 4;
+ int32_t OffImm = (int32_t)MO2.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
// Don't print +0.
- if (OffImm < 0)
- O << ", #-" << -OffImm * 4;
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
else if (OffImm > 0)
- O << ", #" << OffImm * 4;
+ O << ", #" << OffImm;
O << "]";
}
@@ -990,15 +1014,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- int32_t OffImm = (int32_t)MO1.getImm() / 4;
+ int32_t OffImm = (int32_t)MO1.getImm();
+
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
// Don't print +0.
- if (OffImm != 0) {
- O << ", ";
- if (OffImm < 0)
- O << "#-" << -OffImm * 4;
- else if (OffImm > 0)
- O << "#" << OffImm * 4;
- }
+ if (OffImm == INT32_MIN)
+ O << ", #-0";
+ else if (OffImm < 0)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
+ O << ", #" << OffImm;
}
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 8acb7ee..73d7bfd 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -73,6 +73,7 @@ public:
void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ae11be8..de48a0e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -120,14 +120,22 @@ namespace ARM_MB {
// The Memory Barrier Option constants map directly to the 4-bit encoding of
// the option field for memory barrier operations.
enum MemBOpt {
- SY = 15,
- ST = 14,
- ISH = 11,
- ISHST = 10,
- NSH = 7,
- NSHST = 6,
+ RESERVED_0 = 0,
+ RESERVED_1 = 1,
+ OSHST = 2,
OSH = 3,
- OSHST = 2
+ RESERVED_4 = 4,
+ RESERVED_5 = 5,
+ NSHST = 6,
+ NSH = 7,
+ RESERVED_8 = 8,
+ RESERVED_9 = 9,
+ ISHST = 10,
+ ISH = 11,
+ RESERVED_12 = 12,
+ RESERVED_13 = 13,
+ ST = 14,
+ SY = 15
};
inline static const char *MemBOptToString(unsigned val) {
@@ -135,92 +143,24 @@ namespace ARM_MB {
default: llvm_unreachable("Unknown memory operation");
case SY: return "sy";
case ST: return "st";
+ case RESERVED_13: return "#0xd";
+ case RESERVED_12: return "#0xc";
case ISH: return "ish";
case ISHST: return "ishst";
+ case RESERVED_9: return "#0x9";
+ case RESERVED_8: return "#0x8";
case NSH: return "nsh";
case NSHST: return "nshst";
+ case RESERVED_5: return "#0x5";
+ case RESERVED_4: return "#0x4";
case OSH: return "osh";
case OSHST: return "oshst";
+ case RESERVED_1: return "#0x1";
+ case RESERVED_0: return "#0x0";
}
}
} // namespace ARM_MB
-/// getARMRegisterNumbering - Given the enum value for some register, e.g.
-/// ARM::LR, return the number that it corresponds to (e.g. 14).
-inline static unsigned getARMRegisterNumbering(unsigned Reg) {
- using namespace ARM;
- switch (Reg) {
- default:
- llvm_unreachable("Unknown ARM register!");
- case R0: case S0: case D0: case Q0: return 0;
- case R1: case S1: case D1: case Q1: return 1;
- case R2: case S2: case D2: case Q2: return 2;
- case R3: case S3: case D3: case Q3: return 3;
- case R4: case S4: case D4: case Q4: return 4;
- case R5: case S5: case D5: case Q5: return 5;
- case R6: case S6: case D6: case Q6: return 6;
- case R7: case S7: case D7: case Q7: return 7;
- case R8: case S8: case D8: case Q8: return 8;
- case R9: case S9: case D9: case Q9: return 9;
- case R10: case S10: case D10: case Q10: return 10;
- case R11: case S11: case D11: case Q11: return 11;
- case R12: case S12: case D12: case Q12: return 12;
- case SP: case S13: case D13: case Q13: return 13;
- case LR: case S14: case D14: case Q14: return 14;
- case PC: case S15: case D15: case Q15: return 15;
-
- case S16: case D16: return 16;
- case S17: case D17: return 17;
- case S18: case D18: return 18;
- case S19: case D19: return 19;
- case S20: case D20: return 20;
- case S21: case D21: return 21;
- case S22: case D22: return 22;
- case S23: case D23: return 23;
- case S24: case D24: return 24;
- case S25: case D25: return 25;
- case S26: case D26: return 26;
- case S27: case D27: return 27;
- case S28: case D28: return 28;
- case S29: case D29: return 29;
- case S30: case D30: return 30;
- case S31: case D31: return 31;
-
- // Composite registers use the regnum of the first register in the list.
- /* Q0 */ case D0_D2: return 0;
- case D1_D2: case D1_D3: return 1;
- /* Q1 */ case D2_D4: return 2;
- case D3_D4: case D3_D5: return 3;
- /* Q2 */ case D4_D6: return 4;
- case D5_D6: case D5_D7: return 5;
- /* Q3 */ case D6_D8: return 6;
- case D7_D8: case D7_D9: return 7;
- /* Q4 */ case D8_D10: return 8;
- case D9_D10: case D9_D11: return 9;
- /* Q5 */ case D10_D12: return 10;
- case D11_D12: case D11_D13: return 11;
- /* Q6 */ case D12_D14: return 12;
- case D13_D14: case D13_D15: return 13;
- /* Q7 */ case D14_D16: return 14;
- case D15_D16: case D15_D17: return 15;
- /* Q8 */ case D16_D18: return 16;
- case D17_D18: case D17_D19: return 17;
- /* Q9 */ case D18_D20: return 18;
- case D19_D20: case D19_D21: return 19;
- /* Q10 */ case D20_D22: return 20;
- case D21_D22: case D21_D23: return 21;
- /* Q11 */ case D22_D24: return 22;
- case D23_D24: case D23_D25: return 23;
- /* Q12 */ case D24_D26: return 24;
- case D25_D26: case D25_D27: return 25;
- /* Q13 */ case D26_D28: return 26;
- case D27_D28: case D27_D29: return 27;
- /* Q14 */ case D28_D30: return 28;
- case D29_D30: case D29_D31: return 29;
- /* Q15 */
- }
-}
-
/// isARMLowRegister - Returns true if the register is a low register (r0-r7).
///
static inline bool isARMLowRegister(unsigned Reg) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 1964bcd..94f1082 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -18,6 +18,7 @@
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -38,11 +39,12 @@ class ARMMCCodeEmitter : public MCCodeEmitter {
void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
const MCInstrInfo &MCII;
const MCSubtargetInfo &STI;
+ const MCContext &CTX;
public:
ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
- : MCII(mcii), STI(sti) {
+ : MCII(mcii), STI(sti), CTX(ctx) {
}
~ARMMCCodeEmitter() {}
@@ -405,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
// Q registers are encoded as 2x their register number.
switch (Reg) {
@@ -434,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- Reg = getARMRegisterNumbering(MO.getReg());
+ Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
int32_t SImm = MO1.getImm();
bool isAdd = true;
@@ -641,8 +643,8 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
-/// target.
+/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate
+/// ADR label target.
uint32_t ARMMCCodeEmitter::
getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -652,15 +654,23 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
Fixups);
int32_t offset = MO.getImm();
uint32_t Val = 0x2000;
- if (offset < 0) {
+
+ if (offset == INT32_MIN) {
+ Val = 0x1000;
+ offset = 0;
+ } else if (offset < 0) {
Val = 0x1000;
offset *= -1;
}
- Val |= offset;
+
+ int SoImmVal = ARM_AM::getSOImmVal(offset);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ Val |= SoImmVal;
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -670,14 +680,16 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
Fixups);
int32_t Val = MO.getImm();
- if (Val < 0) {
+ if (Val == INT32_MIN)
+ Val = 0x1000;
+ else if (Val < 0) {
Val *= -1;
Val |= 0x1000;
}
return Val;
}
-/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -699,8 +711,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO1 = MI.getOperand(OpIdx);
const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
- unsigned Rn = getARMRegisterNumbering(MO1.getReg());
- unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
return (Rm << 3) | Rn;
}
@@ -716,7 +728,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm12 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -796,7 +808,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false ; // 'U' bit is set as part of the fixup.
@@ -832,7 +844,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
// {7-0} = imm8
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm8 = MO1.getImm();
return (Reg << 8) | Imm8;
}
@@ -915,8 +927,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
const MCOperand &MO2 = MI.getOperand(OpIdx+2);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
- unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
@@ -946,7 +958,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
// {12} isAdd
// {11-0} imm12/Rm
const MCOperand &MO = MI.getOperand(OpIdx);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
Binary |= Rn << 14;
return Binary;
@@ -969,7 +981,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
Binary <<= 7; // Shift amount is bits [11:7]
Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
- Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+ Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0]
}
return Binary | (isAdd << 12) | (isReg << 13);
}
@@ -982,7 +994,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx+1);
bool isAdd = MO1.getImm() != 0;
- return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4);
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4);
}
uint32_t ARMMCCodeEmitter::
@@ -1000,7 +1012,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = getARMRegisterNumbering(MO.getReg());
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
return Imm8 | (isAdd << 8) | (isImm << 9);
}
@@ -1018,7 +1030,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
if (!MO.isReg()) {
- unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
assert(MO.isExpr() && "Unexpected machine operand type!");
const MCExpr *Expr = MO.getExpr();
@@ -1028,14 +1040,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
++MCNumCPRelocations;
return (Rn << 9) | (1 << 13);
}
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm = MO2.getImm();
bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
bool isImm = MO1.getReg() == 0;
uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
// if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
if (!isImm)
- Imm8 = getARMRegisterNumbering(MO1.getReg());
+ Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
}
@@ -1063,7 +1075,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
// {2-0} = Rn
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
unsigned Imm5 = MO1.getImm();
return ((Imm5 & 0x1f) << 3) | Rn;
}
@@ -1090,7 +1102,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
// If The first operand isn't a register, we have a label reference.
const MCOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC.
Imm8 = 0;
isAdd = false; // 'U' bit is handled as part of the fixup.
@@ -1136,7 +1148,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1161,7 +1173,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
// Encode the shift operation Rs.
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
}
unsigned ARMMCCodeEmitter::
@@ -1180,7 +1192,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1219,9 +1231,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
// Encoded as [Rn, Rm, imm].
// FIXME: Needs fixup support.
- unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
Value <<= 4;
- Value |= getARMRegisterNumbering(MO2.getReg());
+ Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
Value <<= 2;
Value |= MO3.getImm();
@@ -1235,7 +1247,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
const MCOperand &MO2 = MI.getOperand(OpNum+1);
// FIXME: Needs fixup support.
- unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
// Even though the immediate is 8 bits long, we need 9 bits in order
// to represent the (inverse of the) sign bit.
@@ -1297,7 +1309,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
// Encode Rm.
- unsigned Binary = getARMRegisterNumbering(MO.getReg());
+ unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
// Encode the shift opcode.
unsigned SBits = 0;
@@ -1353,7 +1365,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
if (SPRRegs || DPRRegs) {
// VLDM/VSTM
- unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
Binary |= (RegNo & 0x1f) << 8;
if (SPRRegs)
@@ -1362,7 +1374,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
Binary |= NumRegs * 2;
} else {
for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
- unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg());
Binary |= 1 << RegNo;
}
}
@@ -1378,7 +1390,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1401,7 +1413,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1427,7 +1439,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
const MCOperand &Reg = MI.getOperand(Op);
const MCOperand &Imm = MI.getOperand(Op + 1);
- unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
unsigned Align = 0;
switch (Imm.getImm()) {
@@ -1446,7 +1458,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return getARMRegisterNumbering(MO.getReg());
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 78faf59..a51e0fa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -408,15 +408,22 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
if (Type == macho::RIT_ARM_Half) {
- // The other-half value only gets populated for the movt relocation.
+ // The other-half value only gets populated for the movt and movw
+ // relocation entries.
uint32_t Value = 0;;
switch ((unsigned)Fixup.getKind()) {
default: break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Value = (FixedValue >> 16) & 0xffff;
+ break;
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- Value = FixedValue;
+ Value = FixedValue & 0xffff;
break;
}
macho::RelocationEntry MREPair;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2097bb9..e9e20dd 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -563,48 +563,6 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
-/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
-/// two-addrss instruction inserted by two-address pass.
-void
-Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
- MachineInstr *UseMI,
- const TargetRegisterInfo &TRI) const {
- if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
- return;
-
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg);
- if (CC == ARMCC::AL || PredReg != ARM::CPSR)
- return;
-
- // Schedule the copy so it doesn't come between previous instructions
- // and UseMI which can form an IT block.
- unsigned SrcReg = SrcMI->getOperand(1).getReg();
- ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
- MachineBasicBlock *MBB = UseMI->getParent();
- MachineBasicBlock::iterator MBBI = SrcMI;
- unsigned NumInsts = 0;
- while (--MBBI != MBB->begin()) {
- if (MBBI->isDebugValue())
- continue;
-
- MachineInstr *NMI = &*MBBI;
- ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg);
- if (!(NCC == CC || NCC == OCC) ||
- NMI->modifiesRegister(SrcReg, &TRI) ||
- NMI->modifiesRegister(ARM::CPSR, &TRI))
- break;
- if (++NumInsts == 4)
- // Too many in a row!
- return;
- }
-
- if (NumInsts) {
- MBB->remove(SrcMI);
- MBB->insert(++MBBI, SrcMI);
- }
-}
-
ARMCC::CondCodes
llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
unsigned Opc = MI->getOpcode();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 0911f8a..2cdcd06 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -57,11 +57,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
- /// two-addrss instruction inserted by two-address pass.
- void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
- const TargetRegisterInfo &TRI) const;
-
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index c8e757b..4ddcd38 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -285,14 +285,14 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
Out << "GlobalValue::LinkerPrivateLinkage"; break;
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
- Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
case GlobalValue::AvailableExternallyLinkage:
Out << "GlobalValue::AvailableExternallyLinkage "; break;
case GlobalValue::LinkOnceAnyLinkage:
Out << "GlobalValue::LinkOnceAnyLinkage "; break;
case GlobalValue::LinkOnceODRLinkage:
Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
+ Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break;
case GlobalValue::WeakAnyLinkage:
Out << "GlobalValue::WeakAnyLinkage"; break;
case GlobalValue::WeakODRLinkage:
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1357cc5..d756aec 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -328,7 +328,10 @@ CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
// can get a useful trip count. The trip count can
// be either a register or an immediate. The location
// of the value depends upon the type (reg or imm).
- while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ for (MachineRegisterInfo::reg_iterator
+ RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+ RI != RE; ++RI) {
+ IV_Opnd = &RI.getOperand();
const MachineInstr *MI = IV_Opnd->getParent();
if (L->contains(MI) && isCompareEqualsImm(MI)) {
const MachineOperand &MO = MI->getOperand(2);
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index c7be5ce..c0c0df6 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -2580,22 +2580,16 @@ let isCall = 1, neverHasSideEffects = 1,
}
// Tail Calls.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
def TCRETURNtg : JInst<(outs), (ins calltarget:$dst),
"jump $dst // TAILCALL", []>;
}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
def TCRETURNtext : JInst<(outs), (ins calltarget:$dst),
"jump $dst // TAILCALL", []>;
}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
- R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
def TCRETURNR : JInst<(outs), (ins IntRegs:$dst),
"jumpr $dst // TAILCALL", []>;
}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 5d087db..4bacb8f 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -40,28 +40,27 @@ EnableIEEERndNear(
HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
HexagonGenSubtargetInfo(TT, CPU, FS),
- HexagonArchVersion(V2),
CPUString(CPU.str()) {
- ParseSubtargetFeatures(CPU, FS);
- switch(HexagonArchVersion) {
- case HexagonSubtarget::V2:
- break;
- case HexagonSubtarget::V3:
- EnableV3 = true;
- break;
- case HexagonSubtarget::V4:
- break;
- case HexagonSubtarget::V5:
- break;
- default:
- // If the programmer has not specified a Hexagon version, default
- // to -mv4.
+ // If the programmer has not specified a Hexagon version, default to -mv4.
+ if (CPUString.empty())
CPUString = "hexagonv4";
- HexagonArchVersion = HexagonSubtarget::V4;
- break;
+
+ if (CPUString == "hexagonv2") {
+ HexagonArchVersion = V2;
+ } else if (CPUString == "hexagonv3") {
+ EnableV3 = true;
+ HexagonArchVersion = V3;
+ } else if (CPUString == "hexagonv4") {
+ HexagonArchVersion = V4;
+ } else if (CPUString == "hexagonv5") {
+ HexagonArchVersion = V5;
+ } else {
+ llvm_unreachable("Unrecognized Hexagon processor version");
}
+ ParseSubtargetFeatures(CPUString, FS);
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 786a0c5..05f6fa6 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -183,8 +183,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
ManglerPrefixTy PrefixTy = Mangler::Default;
if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
PrefixTy = Mangler::Private;
- else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
- GV->hasLinkerPrivateWeakDefAutoLinkage())
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
PrefixTy = Mangler::LinkerPrivate;
// If this global has a name, handle it simply.
diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt
index 6c7343b..28f5219 100644
--- a/lib/Target/Mips/AsmParser/CMakeLists.txt
+++ b/lib/Target/Mips/AsmParser/CMakeLists.txt
@@ -1,3 +1,4 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMMipsAsmParser
MipsAsmParser.cpp
)
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 58b5590..43bd345 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -11,11 +11,20 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
namespace {
class MipsAsmParser : public MCTargetAsmParser {
+
+#define GET_ASSEMBLER_HEADER
+#include "MipsGenAsmMatcher.inc"
+
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
@@ -23,10 +32,11 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
bool ParseInstruction(StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirective(AsmToken DirectiveID);
+ OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
: MCTargetAsmParser() {
@@ -35,6 +45,57 @@ public:
};
}
+namespace {
+
+/// MipsOperand - Instances of this class represent a parsed Mips machine
+/// instruction.
+class MipsOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ k_CondCode,
+ k_CoprocNum,
+ k_Immediate,
+ k_Memory,
+ k_PostIndexRegister,
+ k_Register,
+ k_Token
+ } Kind;
+
+ MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("unimplemented!");
+ }
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const{
+ llvm_unreachable("unimplemented!");
+ }
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("unimplemented!");
+ }
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("unimplemented!");
+ }
+
+ bool isReg() const { return Kind == k_Register; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isMem() const { return Kind == k_Memory; }
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return "";
+ }
+
+ unsigned getReg() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return 0;
+ }
+
+ virtual void print(raw_ostream &OS) const {
+ llvm_unreachable("unimplemented!");
+ }
+};
+}
+
bool MipsAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -58,6 +119,11 @@ ParseDirective(AsmToken DirectiveID) {
return true;
}
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&) {
+ return MatchOperand_ParseFail;
+}
+
extern "C" void LLVMInitializeMipsAsmParser() {
RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index e9a228c..f535c50 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -10,13 +10,18 @@ tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
+ Mips16FrameLowering.cpp
+ Mips16InstrInfo.cpp
+ Mips16RegisterInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
MipsCodeEmitter.cpp
MipsDelaySlotFiller.cpp
+ MipsELFWriterInfo.cpp
MipsJITInfo.cpp
MipsInstrInfo.cpp
MipsISelDAGToDAG.cpp
@@ -26,6 +31,9 @@ add_llvm_target(MipsCodeGen
MipsMCInstLower.cpp
MipsMachineFunction.cpp
MipsRegisterInfo.cpp
+ MipsSEFrameLowering.cpp
+ MipsSEInstrInfo.cpp
+ MipsSERegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
MipsTargetObjectFile.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 042b456..aa57472 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -16,6 +16,7 @@
#include "MipsRegisterInfo.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -274,7 +275,8 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
- Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -298,13 +300,15 @@ Mips64Disassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
- Result = decodeMips64Instruction32(instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
// If we fail to decode in Mips64 decoder space we can try in Mips32
- Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+ Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -379,8 +383,8 @@ static DecodeStatus DecodeMem(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
- unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
- unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg);
Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@@ -401,8 +405,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
- unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
- unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg);
Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@@ -484,7 +488,7 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
- unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2;
+ unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
Inst.addOperand(MCOperand::CreateImm(JumpOffset));
return MCDisassembler::Success;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 6fe0c11..18961fd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -35,6 +35,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
return 0;
case FK_GPRel_4:
case FK_Data_4:
+ case FK_Data_8:
case Mips::fixup_Mips_LO16:
case Mips::fixup_Mips_GPOFF_HI:
case Mips::fixup_Mips_GPOFF_LO:
@@ -59,9 +60,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
break;
case Mips::fixup_Mips_HI16:
case Mips::fixup_Mips_GOT_Local:
- // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ // Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
Value = ((Value + 0x8000) >> 16) & 0xffff;
break;
+ case Mips::fixup_Mips_HIGHER:
+ // Get the 3rd 16-bits.
+ Value = ((Value + 0x80008000LL) >> 32) & 0xffff;
+ break;
+ case Mips::fixup_Mips_HIGHEST:
+ // Get the 4th 16-bits.
+ Value = ((Value + 0x800080008000LL) >> 48) & 0xffff;
+ break;
}
return Value;
@@ -168,7 +177,9 @@ public:
{ "fixup_Mips_GPOFF_LO", 0, 16, 0 },
{ "fixup_Mips_GOT_PAGE", 0, 16, 0 },
{ "fixup_Mips_GOT_OFST", 0, 16, 0 },
- { "fixup_Mips_GOT_DISP", 0, 16, 0 }
+ { "fixup_Mips_GOT_DISP", 0, 16, 0 },
+ { "fixup_Mips_HIGHER", 0, 16, 0 },
+ { "fixup_Mips_HIGHEST", 0, 16, 0 }
};
if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 77c1524..b8489ca 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -34,7 +34,8 @@ namespace {
class MipsELFObjectWriter : public MCELFObjectTargetWriter {
public:
- MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64);
+ MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
+ bool _isN64, bool IsLittleEndian);
virtual ~MipsELFObjectWriter();
@@ -53,7 +54,7 @@ namespace {
}
MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
- bool _isN64)
+ bool _isN64, bool IsLittleEndian)
: MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
/*HasRelocationAddend*/ false,
/*IsN64*/ _isN64) {}
@@ -103,6 +104,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case FK_Data_4:
Type = ELF::R_MIPS_32;
break;
+ case FK_Data_8:
+ Type = ELF::R_MIPS_64;
+ break;
case FK_GPRel_4:
Type = ELF::R_MIPS_GPREL32;
break;
@@ -169,6 +173,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type);
break;
+ case Mips::fixup_Mips_HIGHER:
+ Type = ELF::R_MIPS_HIGHER;
+ break;
+ case Mips::fixup_Mips_HIGHEST:
+ Type = ELF::R_MIPS_HIGHEST;
+ break;
}
return Type;
}
@@ -265,6 +275,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
bool IsLittleEndian,
bool Is64Bit) {
MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
- (Is64Bit) ? true : false);
+ (Is64Bit) ? true : false,
+ IsLittleEndian);
return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index f5cbbd5..77faec5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -110,6 +110,12 @@ namespace Mips {
// resulting in - R_MIPS_GOT_DISP
fixup_Mips_GOT_DISP,
+ // resulting in - R_MIPS_GOT_HIGHER
+ fixup_Mips_HIGHER,
+
+ // resulting in - R_MIPS_HIGHEST
+ fixup_Mips_HIGHEST,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index ff3b3a7..8dab62d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -255,6 +255,12 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
case MCSymbolRefExpr::VK_Mips_TPREL_LO:
FixupKind = Mips::fixup_Mips_TPREL_LO;
break;
+ case MCSymbolRefExpr::VK_Mips_HIGHER:
+ FixupKind = Mips::fixup_Mips_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_Mips_HIGHEST:
+ FixupKind = Mips::fixup_Mips_HIGHEST;
+ break;
} // switch
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 596f071..93de517 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -16,7 +16,9 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
- MipsGenEDInfo.inc MipsGenDisassemblerTables.inc
+ MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \
+ MipsGenAsmMatcher.inc
+
DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 8548ae0..7cec531 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -44,6 +44,8 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
"Enable n64 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
+def FeatureAndroid : SubtargetFeature<"android", "IsAndroid", "true",
+ "Target is android">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
@@ -93,9 +95,20 @@ def MipsAsmWriter : AsmWriter {
bit isMCAsmWriter = 1;
}
+def MipsAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterName = 0;
+}
+
+def MipsAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+
+ // Recognize hard coded registers.
+ string RegisterPrefix = "$";
+}
+
def Mips : Target {
let InstructionSet = MipsInstrInfo;
-
+ let AssemblyParsers = [MipsAsmParser];
let AssemblyWriters = [MipsAsmWriter];
+ let AssemblyParserVariants = [MipsAsmParserVariant];
}
-
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
new file mode 100644
index 0000000..030042f
--- /dev/null
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -0,0 +1,87 @@
+//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16FrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ uint64_t StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ // Adjust stack.
+ if (isInt<16>(-StackSize))
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize);
+}
+
+void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (!StackSize)
+ return;
+
+ // Adjust stack.
+ if (isInt<16>(StackSize))
+ // assumes stacksize multiple of 8
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize);
+}
+
+bool Mips16FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ // FIXME: implement.
+ return true;
+}
+
+bool
+Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ // FIXME: implement.
+ return true;
+}
+
+void Mips16FrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+}
+
+const MipsFrameLowering *
+llvm::createMips16FrameLowering(const MipsSubtarget &ST) {
+ return new Mips16FrameLowering(ST);
+}
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
new file mode 100644
index 0000000..25cc37b
--- /dev/null
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -0,0 +1,43 @@
+//===-- Mips16FrameLowering.h - Mips16 frame lowering ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16_FRAMEINFO_H
+#define MIPS16_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+class Mips16FrameLowering : public MipsFrameLowering {
+public:
+ explicit Mips16FrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI) {}
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
new file mode 100644
index 0000000..2bc286b
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -0,0 +1,132 @@
+//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16InstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
+ : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0),
+ RI(*tm.getSubtargetImpl(), *this) {}
+
+const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned Mips16InstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned Mips16InstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ return 0;
+}
+
+void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0, ZeroReg = 0;
+
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::Mov32R16;
+ }
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void Mips16InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(false && "Implement this function.");
+}
+
+void Mips16InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(false && "Implement this function.");
+}
+
+bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ switch(MI->getDesc().getOpcode()) {
+ default:
+ return false;
+ case Mips::RetRA16:
+ ExpandRetRA16(MBB, MI, Mips::JrRa16);
+ break;
+ }
+
+ MBB.erase(MI);
+ return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ assert(false && "Implement this function.");
+ return 0;
+}
+
+unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+ return 0;
+}
+
+void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+}
+
+const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
+ return new Mips16InstrInfo(TM);
+}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
new file mode 100644
index 0000000..260c5b6
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -0,0 +1,76 @@
+//===-- Mips16InstrInfo.h - Mips16 Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16INSTRUCTIONINFO_H
+#define MIPS16INSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "Mips16RegisterInfo.h"
+
+namespace llvm {
+
+class Mips16InstrInfo : public MipsInstrInfo {
+ const Mips16RegisterInfo RI;
+
+public:
+ explicit Mips16InstrInfo(MipsTargetMachine &TM);
+
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+ void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned Opc) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index c852042..94cf984 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -11,10 +11,6 @@
//
//===----------------------------------------------------------------------===//
-def uimm5 : Operand<i8> {
- let DecoderMethod= "DecodeSimm16";
-}
-
//
// RRR-type instruction format
//
@@ -46,9 +42,32 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr,
class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
+
+class FEXT_2RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin> {
+ let Constraints = "$rx_ = $rx";
+}
+
+
//
// RR-type instruction format
//
+
+class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRxRxRy16_ins<bits<5> f, string asmstr,
+ InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rz, $ry"),
+ [], itin> {
+ let Constraints = "$rx = $rz";
+}
+
let rx=0 in
class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
string asmstr, InstrItinClass itin>:
@@ -64,11 +83,16 @@ class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd,
FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr),
!strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd,
+ InstrItinClass itin>:
+ FEXT_RRI16<op, (outs ), (ins CPU16Regs:$ry, MemOpnd:$addr),
+ !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+
//
// EXT-SHIFT instruction format
//
class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
- FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, uimm5:$sa),
+ FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
!strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
//
@@ -80,20 +104,49 @@ def mem16 : Operand<i32> {
}
//
+// Some general instruction class info
+//
+//
+
+class ArithLogic16Defs<bit isCom=0> {
+ bits<5> shamt = 0;
+ bit isCommutable = isCom;
+ bit isReMaterializable = 1;
+ bit neverHasSideEffects = 1;
+}
+
+//
+
+// Format: ADDIU rx, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
+// To add a constant to a 32-bit integer.
+//
+def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+
+def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
+ ArithLogic16Defs<0>;
+
+//
+
// Format: ADDIU rx, pc, immediate MIPS16e
// Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended)
// To add a constant to the program counter.
//
-class AddiuRxPcImmX16_base : FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
-def AddiuRxPcImmX16 : AddiuRxPcImmX16_base;
+def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
//
// Format: ADDU rz, rx, ry MIPS16e
// Purpose: Add Unsigned Word (3-Operand)
// To add 32-bit integers.
//
-class AdduRxRyRz16_base: FRRR16_ins<01, "addu", IIAlu>;
-def AdduRxRyRz16: AdduRxRyRz16_base;
+def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
+
+//
+// Format: AND rx, ry MIPS16e
+// Purpose: AND
+// To do a bitwise logical AND.
+
+def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
//
// Format: JR ra MIPS16e
@@ -105,6 +158,34 @@ def AdduRxRyRz16: AdduRxRyRz16_base;
def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>;
//
+// Format: LB ry, offset(rx) MIPS16e
+// Purpose: Load Byte (Extended)
+// To load a byte from memory as a signed value.
+//
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>;
+
+//
+// Format: LBU ry, offset(rx) MIPS16e
+// Purpose: Load Byte Unsigned (Extended)
+// To load a byte from memory as a unsigned value.
+//
+def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>;
+
+//
+// Format: LH ry, offset(rx) MIPS16e
+// Purpose: Load Halfword signed (Extended)
+// To load a halfword from memory as a signed value.
+//
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>;
+
+//
+// Format: LHU ry, offset(rx) MIPS16e
+// Purpose: Load Halfword unsigned (Extended)
+// To load a halfword from memory as an unsigned value.
+//
+def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>;
+
+//
// Format: LI rx, immediate MIPS16e
// Purpose: Load Immediate (Extended)
// To load a constant into a GPR.
@@ -116,8 +197,7 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
// Purpose: Load Word (Extended)
// To load a word from memory as a signed value.
//
-class LwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
-def LwRxRyOffMemX16: LwRxRyOffMemX16_base;
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
//
// Format: MOVE r32, rz MIPS16e
@@ -125,6 +205,28 @@ def LwRxRyOffMemX16: LwRxRyOffMemX16_base;
// To move the contents of a GPR to a GPR.
//
def Mov32R16: FI8_MOV32R16_ins<"move", IIAlu>;
+
+//
+// Format: NEG rx, ry MIPS16e
+// Purpose: Negate
+// To negate an integer value.
+//
+def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>;
+
+//
+// Format: NOT rx, ry MIPS16e
+// Purpose: Not
+// To complement an integer value
+//
+def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>;
+
+//
+// Format: OR rx, ry MIPS16e
+// Purpose: Or
+// To do a bitwise logical OR.
+//
+def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
+
//
// Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize}
// (All args are optional) MIPS16e
@@ -156,6 +258,20 @@ def SaveRaF16:
"save \t$$ra, $frame_size", [], IILoad >;
//
+// Format: SB ry, offset(rx) MIPS16e
+// Purpose: Store Byte (Extended)
+// To store a byte to memory.
+//
+def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>;
+
+//
+// Format: SH ry, offset(rx) MIPS16e
+// Purpose: Store Halfword (Extended)
+// To store a halfword to memory.
+//
+def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>;
+
+//
// Format: SLL rx, ry, sa MIPS16e
// Purpose: Shift Word Left Logical (Extended)
// To execute a left-shift of a word by a fixed number of bits—0 to 31 bits.
@@ -163,57 +279,127 @@ def SaveRaF16:
def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
//
+// Format: SLLV ry, rx MIPS16e
+// Purpose: Shift Word Left Logical Variable
+// To execute a left-shift of a word by a variable number of bits.
+//
+def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+
+
+//
+// Format: SRAV ry, rx MIPS16e
+// Purpose: Shift Word Right Arithmetic Variable
+// To execute an arithmetic right-shift of a word by a variable
+// number of bits.
+//
+def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
+
+
+//
+// Format: SRA rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Arithmetic (Extended)
+// To execute an arithmetic right-shift of a word by a fixed
+// number of bits—1 to 8 bits.
+//
+def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
+
+
+//
+// Format: SRLV ry, rx MIPS16e
+// Purpose: Shift Word Right Logical Variable
+// To execute a logical right-shift of a word by a variable
+// number of bits.
+//
+def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
+
+
+//
+// Format: SRL rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Logical (Extended)
+// To execute a logical right-shift of a word by a fixed
+// number of bits—1 to 31 bits.
+//
+def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
+
+//
+// Format: SUBU rz, rx, ry MIPS16e
+// Purpose: Subtract Unsigned Word
+// To subtract 32-bit integers
+//
+def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
+
+//
// Format: SW ry, offset(rx) MIPS16e
// Purpose: Store Word (Extended)
// To store a word to memory.
//
-class SwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b11011, "sw", mem16, IIAlu>;
-def SwRxRyOffMemX16: SwRxRyOffMemX16_base;
+def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>;
+
+//
+// Format: XOR rx, ry MIPS16e
+// Purpose: Xor
+// To do a bitwise logical XOR.
+//
+def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>;
class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [InMips16Mode];
}
-class ArithLogicR16Defs<SDNode OpNode, bit isComm = 0> {
- dag OutOperandList = (outs CPU16Regs:$rz);
- dag InOperandList = (ins CPU16Regs:$rx, CPU16Regs:$ry);
- list<dag> Pattern = [(set CPU16Regs:$rz,
- (OpNode CPU16Regs:$rx, CPU16Regs:$ry))];
-}
+// Unary Arith/Logic
+//
+class ArithLogicU_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r),
+ (I CPU16Regs:$r)>;
-multiclass ArithLogicR16_base {
- def _add: AdduRxRyRz16_base, ArithLogicR16Defs<add, 1>;
-}
+def: ArithLogicU_pat<not, NotRxRy16>;
+def: ArithLogicU_pat<ineg, NegRxRy16>;
-defm ArithLogicR16_patt : ArithLogicR16_base;
+class ArithLogic16_pat<SDNode OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$l, CPU16Regs:$r),
+ (I CPU16Regs:$l, CPU16Regs:$r)>;
-class LoadM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> {
- bit isPseudo = Pseudo;
- Operand MemOpnd = _MemOpnd;
- dag OutOperandList = (outs CPU16Regs:$ry);
- dag InOperandList = (ins MemOpnd:$addr);
- list<dag> Pattern = [(set CPU16Regs:$ry, (OpNode addr:$addr))];
-}
+def: ArithLogic16_pat<add, AdduRxRyRz16>;
+def: ArithLogic16_pat<and, AndRxRxRy16>;
+def: ArithLogic16_pat<or, OrRxRxRy16>;
+def: ArithLogic16_pat<sub, SubuRxRyRz16>;
+def: ArithLogic16_pat<xor, XorRxRxRy16>;
-multiclass LoadM16_base {
- def _LwRxRyOffMemX16: LwRxRyOffMemX16_base, LoadM16Defs<load_a, mem16>;
-}
+// Arithmetic and logical instructions with 2 register operands.
-defm LoadM16: LoadM16_base;
+class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm),
+ (I CPU16Regs:$in, imm_type:$imm)>;
-class StoreM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> {
- bit isPseudo = Pseudo;
- Operand MemOpnd = _MemOpnd;
- dag OutOperandList = (outs );
- dag InOperandList = (ins CPU16Regs:$ry, MemOpnd:$addr);
- list<dag> Pattern = [(OpNode CPU16Regs:$ry, addr:$addr)];
-}
+def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>;
+def: ArithLogicI16_pat<shl, immZExt5, SllX16>;
+def: ArithLogicI16_pat<srl, immZExt5, SrlX16>;
+def: ArithLogicI16_pat<sra, immZExt5, SraX16>;
-multiclass StoreM16_base {
- def _SwRxRyOffMemX16: SwRxRyOffMemX16_base, StoreM16Defs<store_a, mem16>;
-}
+class shift_rotate_reg16_pat<SDNode OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r, CPU16Regs:$ra),
+ (I CPU16Regs:$r, CPU16Regs:$ra)>;
+
+def: shift_rotate_reg16_pat<shl, SllvRxRy16>;
+def: shift_rotate_reg16_pat<sra, SravRxRy16>;
+def: shift_rotate_reg16_pat<srl, SrlvRxRy16>;
+
+class LoadM16_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>;
+
+def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>;
+def: LoadM16_pat<sextloadi16_a, LhRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi16_a, LhuRxRyOffMemX16>;
+def: LoadM16_pat<load_a, LwRxRyOffMemX16>;
+
+class StoreM16_pat<PatFrag OpNode, Instruction I> :
+ Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>;
+
+def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>;
+def: StoreM16_pat<truncstorei16_a, ShRxRyOffMemX16>;
+def: StoreM16_pat<store_a, SwRxRyOffMemX16>;
-defm StoreM16: StoreM16_base;
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1 in
@@ -226,18 +412,8 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1,
hasExtraSrcRegAllocReq = 1 in
def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
-// As stack alignment is always done with addiu, we need a 16-bit immediate
-// This is basically deprecated code but needs to be there for things
-// to work.
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN16 : MipsPseudo16<(outs), (ins uimm16:$amt),
- ";",
- [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2),
- ";",
- [(callseq_end timm:$amt1, timm:$amt2)]>;
-}
-
// Small immediates
-def : Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
-def : Mips16Pat<(MipsLo tglobaladdr:$in), (LiRxImmX16 tglobaladdr:$in)>;
+def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
+
+def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
new file mode 100644
index 0000000..c15d1bf
--- /dev/null
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16RegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &TII)
+ : MipsRegisterInfo(ST, TII) {}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void Mips16RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+ unsigned OpNo, int FrameIndex,
+ uint64_t StackSize,
+ int64_t SPOffset) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always
+ // referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (MipsFI->isOutArgFI(FrameIndex) ||
+ (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object
+ // is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following,
+ // its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int64_t Offset;
+
+ if (MipsFI->isOutArgFI(FrameIndex))
+ Offset = SPOffset;
+ else
+ Offset = SPOffset + (int64_t)StackSize;
+
+ Offset += MI.getOperand(OpNo + 1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+ MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+
+
+}
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
new file mode 100644
index 0000000..3f4b3a7
--- /dev/null
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -0,0 +1,37 @@
+//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16REGISTERINFO_H
+#define MIPS16REGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class Mips16RegisterInfo : public MipsRegisterInfo {
+public:
+ Mips16RegisterInfo(const MipsSubtarget &Subtarget,
+ const TargetInstrInfo &TII);
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index cceee24..20fc178 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -208,26 +208,25 @@ def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
-def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
+def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
}
let Uses = [SP_64], DecoderNamespace = "Mips64" in
-def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
- Requires<[IsN64, HasStandardEncoding]> {
- let isCodeGenOnly = 1;
-}
+def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
+ Requires<[IsN64, HasStandardEncoding]>;
let DecoderNamespace = "Mips64" in {
def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
def DEXT : ExtBase<3, "dext", CPU64Regs>;
def DINS : InsBase<7, "dins", CPU64Regs>;
-def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
- "dsll\t$rd, $rt, 32", [], IIAlu>;
-def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
- "sll\t$rd, $rt, 0", [], IIAlu>;
-let isCodeGenOnly = 1 in
-def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
- "sll\t$rd, $rt, 0", [], IIAlu>;
+let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
+ def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "dsll\t$rd, $rt, 32", [], IIAlu>;
+ def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+ def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+}
}
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 8aadefd..19213fa 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -145,6 +145,17 @@ def RetCC_MipsEABI : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
+// Mips Android Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MipsAndroid : CallingConv<[
+ // f32 are returned in registers F0, F2, F1, F3
+ CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>,
+
+ CCDelegateTo<RetCC_MipsO32>
+]>;
+
+//===----------------------------------------------------------------------===//
// Mips FastCC Calling Convention
//===----------------------------------------------------------------------===//
def CC_MipsO32_FastCC : CallingConv<[
@@ -210,6 +221,7 @@ def RetCC_Mips : CallingConv<[
CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
+ CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>,
CCDelegateTo<RetCC_MipsO32>
]>;
diff --git a/lib/Target/Mips/MipsELFWriterInfo.cpp b/lib/Target/Mips/MipsELFWriterInfo.cpp
new file mode 100644
index 0000000..ac3a547
--- /dev/null
+++ b/lib/Target/Mips/MipsELFWriterInfo.cpp
@@ -0,0 +1,92 @@
+//===-- MipsELFWriterInfo.cpp - ELF Writer Info for the Mips backend ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the Mips backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsELFWriterInfo.h"
+#include "MipsRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the MipsELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MipsELFWriterInfo::MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+ : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
+ EMachine = EM_MIPS;
+}
+
+MipsELFWriterInfo::~MipsELFWriterInfo() {}
+
+unsigned MipsELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch(MachineRelTy) {
+ case Mips::reloc_mips_pc16:
+ return ELF::R_MIPS_GOT16;
+ case Mips::reloc_mips_hi:
+ return ELF::R_MIPS_HI16;
+ case Mips::reloc_mips_lo:
+ return ELF::R_MIPS_LO16;
+ case Mips::reloc_mips_26:
+ return ELF::R_MIPS_26;
+ default:
+ llvm_unreachable("unknown Mips machine relocation type");
+ }
+}
+
+long int MipsELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ switch(RelTy) {
+ case ELF::R_MIPS_26: return Modifier;
+ default:
+ llvm_unreachable("unknown Mips relocation type");
+ }
+}
+
+unsigned MipsELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ switch(RelTy) {
+ case ELF::R_MIPS_GOT16:
+ case ELF::R_MIPS_26:
+ return 32;
+ default:
+ llvm_unreachable("unknown Mips relocation type");
+ }
+}
+
+bool MipsELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ switch(RelTy) {
+ case ELF::R_MIPS_GOT16:
+ return true;
+ case ELF::R_MIPS_26:
+ return false;
+ default:
+ llvm_unreachable("unknown Mips relocation type");
+ }
+}
+
+unsigned MipsELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ return Mips::reloc_mips_26;
+}
+
+long int MipsELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+
+ if (RelTy == ELF::R_MIPS_GOT16)
+ return SymOffset - (RelOffset + 4);
+
+ llvm_unreachable("computeRelocation unknown for this relocation type");
+}
diff --git a/lib/Target/Mips/MipsELFWriterInfo.h b/lib/Target/Mips/MipsELFWriterInfo.h
new file mode 100644
index 0000000..23f3f03
--- /dev/null
+++ b/lib/Target/Mips/MipsELFWriterInfo.h
@@ -0,0 +1,59 @@
+//===-- MipsELFWriterInfo.h - ELF Writer Info for Mips ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the Mips backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_ELF_WRITER_INFO_H
+#define MIPS_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class MipsELFWriterInfo : public TargetELFWriterInfo {
+
+ public:
+ MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+ virtual ~MipsELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // MIPS_ELF_WRITER_INFO_H
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 6338f3c..8c0474b 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -15,6 +15,7 @@
#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -81,6 +82,14 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
+const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM,
+ const MipsSubtarget &ST) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16FrameLowering(ST);
+
+ return llvm::createMipsSEFrameLowering(ST);
+}
+
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
@@ -89,218 +98,3 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
}
-
-bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
- return true;
-}
-
-void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- const MipsRegisterInfo *RegInfo =
- static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
- unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-
- // First, compute final stack size.
- unsigned StackAlign = getStackAlignment();
- uint64_t StackSize = RoundUpToAlignment(MFI->getStackSize(), StackAlign);
-
- if (MipsFI->globalBaseRegSet())
- StackSize += MFI->getObjectOffset(MipsFI->getGlobalRegFI()) + StackAlign;
- else
- StackSize += RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign);
-
- // Update stack size
- MFI->setStackSize(StackSize);
-
- // No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->adjustsStack()) return;
-
- MachineModuleInfo &MMI = MF.getMMI();
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- MachineLocation DstML, SrcML;
-
- // Adjust stack.
- if (isInt<16>(-StackSize)) {// addi sp, sp, (-stacksize)
- if (STI.inMips16Mode())
- BuildMI(MBB, MBBI, dl,
- TII.get(Mips::SaveRaF16)).addImm(StackSize); // cleanup
- else
- BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize);
- }
- else { // Expand immediate that doesn't fit in 16-bit.
- unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
-
- MF.getInfo<MipsFunctionInfo>()->setEmitNOAT();
- Mips::loadImmediate(-StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false,
- 0);
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg);
- }
-
- // emit ".cfi_def_cfa_offset StackSize"
- MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
- DstML = MachineLocation(MachineLocation::VirtualFP);
- SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
- Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
-
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
- if (CSI.size()) {
- // Find the instruction past the last instruction that saves a callee-saved
- // register to the stack.
- for (unsigned i = 0; i < CSI.size(); ++i)
- ++MBBI;
-
- // Iterate over list of callee-saved registers and emit .cfi_offset
- // directives.
- MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
-
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
-
- // If Reg is a double precision register, emit two cfa_offsets,
- // one for each of the paired single precision registers.
- if (Mips::AFGR64RegClass.contains(Reg)) {
- MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
- MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
- MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven));
- MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd));
-
- if (!STI.isLittle())
- std::swap(SrcML0, SrcML1);
-
- Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
- Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
- } else {
- // Reg is either in CPURegs or FGR32.
- DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
- SrcML = MachineLocation(Reg);
- Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
- }
- }
- }
-
- // if framepointer enabled, set it to point to the stack pointer.
- if (hasFP(MF)) {
- // Insert instruction "move $fp, $sp" at this location.
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
-
- // emit ".cfi_def_cfa_register $fp"
- MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl,
- TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
- DstML = MachineLocation(FP);
- SrcML = MachineLocation(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
- }
-}
-
-void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
- DebugLoc dl = MBBI->getDebugLoc();
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
- unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-
- // if framepointer enabled, restore the stack pointer.
- if (hasFP(MF)) {
- // Find the first instruction that restores a callee-saved register.
- MachineBasicBlock::iterator I = MBBI;
-
- for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
- --I;
-
- // Insert instruction "move $sp, $fp" at this location.
- BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
- }
-
- // Get the number of bytes from FrameInfo
- uint64_t StackSize = MFI->getStackSize();
-
- if (!StackSize)
- return;
-
- // Adjust stack.
- if (isInt<16>(StackSize)) { // addi sp, sp, (-stacksize)
- if (STI.inMips16Mode())
- // assumes stacksize multiple of 8
- BuildMI(MBB, MBBI, dl,
- TII.get(Mips::RestoreRaF16)).addImm(StackSize);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize);
- }
- else { // Expand immediate that doesn't fit in 16-bit.
- unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
-
- MF.getInfo<MipsFunctionInfo>()->setEmitNOAT();
- Mips::loadImmediate(StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false,
- 0);
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg);
- }
-}
-
-void MipsFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
-
- // FIXME: remove this code if register allocator can correctly mark
- // $fp and $ra used or unused.
-
- // Mark $fp and $ra as used or unused.
- if (hasFP(MF))
- MRI.setPhysRegUsed(FP);
-}
-
-bool MipsFrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction *MF = MBB.getParent();
- MachineBasicBlock *EntryBlock = MF->begin();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
-
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- // Add the callee-saved register as live-in. Do not add if the register is
- // RA and return address is taken, because it has already been added in
- // method MipsTargetLowering::LowerRETURNADDR.
- // It's killed at the spill, unless the register is RA and return address
- // is taken.
- unsigned Reg = CSI[i].getReg();
- bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
- && MF->getFrameInfo()->isReturnAddressTaken();
- if (!IsRAAndRetAddrIsTaken)
- EntryBlock->addLiveIn(Reg);
-
- // Insert the spill to the stack frame.
- bool IsKill = !IsRAAndRetAddrIsTaken;
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill,
- CSI[i].getFrameIdx(), RC, TRI);
- }
-
- return true;
-}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index e364ded..ed7b7fe 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -27,28 +27,19 @@ protected:
public:
explicit MipsFrameLowering(const MipsSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0),
- STI(sti) {
- }
+ : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0,
+ sti.hasMips64() ? 16 : 8), STI(sti) {}
- bool targetHandlesStackFrameRounding() const;
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ static const MipsFrameLowering *create(MipsTargetMachine &TM,
+ const MipsSubtarget &ST);
bool hasFP(const MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
};
+/// Create MipsInstrInfo objects.
+const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
+const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
+
} // End llvm namespace
#endif
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index ea33b74..5a97c17 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -117,28 +117,23 @@ private:
void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- if (((MF.getTarget().getRelocationModel() == Reloc::Static) ||
- Subtarget.inMips16Mode()) && !MipsFI->globalBaseRegSet())
+ if (!MipsFI->globalBaseRegSet())
return;
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const MipsRegisterInfo *TargetRegInfo = TM.getRegisterInfo();
- const MipsInstrInfo *MII = TM.getInstrInfo();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- int FI = 0;
+ const TargetRegisterClass *RC;
- FI= MipsFI->initGlobalRegFI();
-
- const TargetRegisterClass *RC = Subtarget.isABI_N64() ?
- (const TargetRegisterClass*)&Mips::CPU64RegsRegClass :
- (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
- if (Subtarget.inMips16Mode())
- RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else if (Subtarget.inMips16Mode())
+ RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
V0 = RegInfo.createVirtualRegister(RC);
V1 = RegInfo.createVirtualRegister(RC);
@@ -158,23 +153,17 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
.addReg(Mips::T9_64);
BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
- TargetRegInfo);
return;
}
if (Subtarget.inMips16Mode()) {
BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16),
- V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
- BuildMI(MBB, I, DL, TII.get(Mips::SllX16),
- V2 ).addReg(V0).addImm(16);
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
.addReg(V1).addReg(V2);
-
-
return;
}
@@ -203,19 +192,11 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
- TargetRegInfo);
return;
}
assert(Subtarget.isABI_O32());
-
- //if (Subtarget.inMips16Mode())
- // return; // no need to load GP. It can be calculated anywhere
-
-
-
// For O32 ABI, the following instruction sequence is emitted to initialize
// the global base register:
//
@@ -237,7 +218,6 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
MBB.addLiveIn(Mips::V0);
BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
.addReg(Mips::V0).addReg(Mips::T9);
- MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, TargetRegInfo);
}
bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
@@ -262,13 +242,14 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
// Replace uses with ZeroReg.
for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
- E = MRI->use_end(); U != E; ++U) {
+ E = MRI->use_end(); U != E;) {
MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
MachineInstr *MI = MO.getParent();
+ ++U;
// Do not replace if it is a phi's operand or is tied to def operand.
- if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()) ||
- MI->isPseudo())
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
continue;
MO.setReg(ZeroReg);
@@ -309,21 +290,6 @@ bool MipsDAGToDAGISel::
SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
EVT ValTy = Addr.getValueType();
- // If Parent is an unaligned f32 load or store, select a (base + index)
- // floating point load/store instruction (luxc1 or suxc1).
- const LSBaseSDNode *LS = 0;
-
- if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
- EVT VT = LS->getMemoryVT();
-
- if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
- assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
- "Unaligned loads/stores not supported for this type.");
- if (VT == MVT::f32)
- return false;
- }
- }
-
// if Address is FI, get the TargetFrameIndex.
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
@@ -382,6 +348,8 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
}
// If an indexed floating point load/store can be emitted, return false.
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
if (LS &&
(LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
Subtarget.hasMips32r2Or64())
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 7741f9f..c5207c6 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -157,7 +157,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
@@ -178,7 +177,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
}
@@ -217,6 +215,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
if (!Subtarget->hasMips32r2())
setOperationAction(ISD::ROTR, MVT::i32, Expand);
@@ -314,8 +314,6 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
case MVT::i64:
case MVT::i32:
return true;
- case MVT::f32:
- return Subtarget->hasMips32r2Or64();
default:
return false;
}
@@ -794,7 +792,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -1504,42 +1501,6 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
SDValue MipsTargetLowering::
-LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
-{
- MachineFunction &MF = DAG.getMachineFunction();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP;
-
- assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
- cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
- "Cannot lower if the alignment of the allocated space is larger than \
- that of the stack.");
-
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
-
- // Get a reference from Mips stack pointer
- SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy());
-
- // Subtract the dynamic size from the actual stack size to
- // obtain the new stack size.
- SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size);
-
- // The Sub result contains the new stack start address, so it
- // must be placed in the stack pointer register.
- Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue());
-
- // This node always has two return values: a new stack pointer
- // value and a chain
- SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other);
- SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
- SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
-
- return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
-}
-
-SDValue MipsTargetLowering::
LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
// The first operand is the chain, the second is the condition, the third is
@@ -2455,9 +2416,9 @@ static unsigned getNextIntArgReg(unsigned Reg) {
// Write ByVal Arg to arg registers and stack.
static void
-WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+WriteByValArg(SDValue Chain, DebugLoc dl,
SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
- SmallVector<SDValue, 8> &MemOpChains, int &LastFI,
+ SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
MVT PtrType, bool isLittle) {
@@ -2531,24 +2492,24 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
return;
}
- // Create a fixed object on stack at offset LocMemOffset and copy
- // remaining part of byval arg to it using memcpy.
+ // Copy remaining part of byval arg using memcpy.
SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
DAG.getConstant(Offset, MVT::i32));
- LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
- ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
- DAG.getConstant(RemainingSize, MVT::i32),
- std::min(ByValAlign, (unsigned)4),
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr,
+ DAG.getIntPtrConstant(LocMemOffset));
+ Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+ DAG.getConstant(RemainingSize, MVT::i32),
+ std::min(ByValAlign, (unsigned)4),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+ MemOpChains.push_back(Chain);
}
// Copy Mips64 byVal arg to registers and stack.
void static
-PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+PassByValArg64(SDValue Chain, DebugLoc dl,
SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
- SmallVector<SDValue, 8> &MemOpChains, int &LastFI,
+ SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
EVT PtrTy, bool isLittle) {
@@ -2620,16 +2581,16 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
assert(MemCpySize && "MemCpySize must not be zero.");
- // Create a fixed object on stack at offset LocMemOffset and copy
- // remainder of byval arg to it with memcpy.
+ // Copy remainder of byval arg to it with memcpy.
SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
DAG.getConstant(Offset, PtrTy));
- LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy);
- ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
- DAG.getConstant(MemCpySize, PtrTy), Alignment,
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr,
+ DAG.getIntPtrConstant(LocMemOffset));
+ Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+ DAG.getConstant(MemCpySize, PtrTy), Alignment,
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
+ MemOpChains.push_back(Chain);
}
/// LowerCall - functions arguments are copied from virtual regs to
@@ -2643,9 +2604,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
- SDValue InChain = CLI.Chain;
+ SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- SDValue CalleeSave = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
@@ -2675,18 +2635,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
-
- // Chain is the output chain of the last Load/Store or CopyToReg node.
- // ByValChain is the output chain of the last Memcpy node created for copying
- // byval arguments to the stack.
- SDValue Chain, CallSeqStart, ByValChain;
- SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
- Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal);
- ByValChain = InChain;
-
- // Get the frame index of the stack frame object that points to the location
- // of dynamically allocated area on the stack.
- int DynAllocFI = MipsFI->getDynAllocFI();
+ unsigned StackAlignment = TFL->getStackAlignment();
+ NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
// Update size of the maximum argument space.
// For O32, a minimum of four words (16 bytes) of argument space is
@@ -2694,27 +2644,23 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (IsO32 && (CallConv != CallingConv::Fast))
NextStackOffset = std::max(NextStackOffset, (unsigned)16);
- unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
-
- if (MaxCallFrameSize < NextStackOffset) {
- MipsFI->setMaxCallFrameSize(NextStackOffset);
+ // Chain is the output chain of the last Load/Store or CopyToReg node.
+ // ByValChain is the output chain of the last Memcpy node created for copying
+ // byval arguments to the stack.
+ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+ Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
- // Set the offsets relative to $sp of the $gp restore slot and dynamically
- // allocated stack space. These offsets must be aligned to a boundary
- // determined by the stack alignment of the ABI.
- unsigned StackAlignment = TFL->getStackAlignment();
- NextStackOffset = (NextStackOffset + StackAlignment - 1) /
- StackAlignment * StackAlignment;
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
+ IsN64 ? Mips::SP_64 : Mips::SP,
+ getPointerTy());
- MFI->setObjectOffset(DynAllocFI, NextStackOffset);
- }
+ if (MipsFI->getMaxCallFrameSize() < NextStackOffset)
+ MipsFI->setMaxCallFrameSize(NextStackOffset);
// With EABI is it possible to have 16 args on registers.
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0;
-
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
SDValue Arg = OutVals[i];
@@ -2727,11 +2673,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
if (IsO32)
- WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+ WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr,
MFI, DAG, Arg, VA, Flags, getPointerTy(),
Subtarget->isLittle());
else
- PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+ PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr,
MFI, DAG, Arg, VA, Flags, getPointerTy(),
Subtarget->isLittle());
continue;
@@ -2781,29 +2727,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Register can't get to this point...
assert(VA.isMemLoc());
- // Create the frame index object for this incoming parameter
- LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), true);
- SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
-
// emit ISD::STORE whichs stores the
// parameter value to a stack Location
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0));
}
- // Extend range of indices of frame objects for outgoing arguments that were
- // created during this function call. Skip this step if no such objects were
- // created.
- if (LastFI)
- MipsFI->extendOutArgFIRange(FirstFI, LastFI);
-
- // If a memcpy has been created to copy a byval arg to a stack, replace the
- // chain input of CallSeqStart with ByValChain.
- if (InChain != ByValChain)
- DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain,
- NextStackOffsetVal);
-
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
@@ -2867,6 +2798,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
+ // T9 register operand.
+ SDValue T9;
+
// T9 should contain the address of the callee function if
// -reloction-model=pic or it is an indirect call.
if (IsPICCall || !GlobalOrExternal) {
@@ -2874,7 +2808,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
InFlag = Chain.getValue(1);
- Callee = DAG.getRegister(T9Reg, getPointerTy());
+
+ if (Subtarget->inMips16Mode())
+ T9 = DAG.getRegister(T9Reg, getPointerTy());
+ else
+ Callee = DAG.getRegister(T9Reg, getPointerTy());
}
// Insert node "GP copy globalreg" before call to function.
@@ -2902,7 +2840,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
- Ops.push_back(Subtarget->inMips16Mode()? CalleeSave: Callee);
+ Ops.push_back(Callee);
// Add argument registers to the end of the list so that they are
// known live into the call.
@@ -2910,8 +2848,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- if (Subtarget->inMips16Mode())
- Ops.push_back(Callee);
+ // Add T9 register operand.
+ if (T9.getNode())
+ Ops.push_back(T9);
+
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
@@ -2925,8 +2865,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(NextStackOffset, true),
+ Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
DAG.getIntPtrConstant(0, true), InFlag);
InFlag = Chain.getValue(1);
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index edab03c..95ea8fa 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -132,7 +132,6 @@ namespace llvm {
// Lower Operand specifics
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 9654b86..df45df4 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -101,18 +101,18 @@ class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
}
// FP indexed load.
class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, PatFrag FOp>:
+ RegisterClass PRC, SDPatternOperator FOp = null_frag>:
FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
- !strconcat(opstr, "\t$fd, $index($base)"),
+ !strconcat(opstr, "\t$fd, ${index}(${base})"),
[(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
let fs = 0;
}
// FP indexed store.
class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, PatFrag FOp>:
+ RegisterClass PRC, SDPatternOperator FOp= null_frag>:
FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
- !strconcat(opstr, "\t$fs, $index($base)"),
+ !strconcat(opstr, "\t$fs, ${index}(${base})"),
[(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
let fd = 0;
}
@@ -270,7 +270,7 @@ let Predicates = [NotN64, HasStandardEncoding] in {
}
let Predicates = [NotN64, HasMips64, HasStandardEncoding],
- DecoderNamespace = "Mips64" in {
+ DecoderNamespace = "Mips64" in {
def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
}
@@ -283,9 +283,7 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
// Indexed loads and stores.
let Predicates = [HasMips32r2Or64, HasStandardEncoding] in {
def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>;
- def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>;
def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>;
- def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>;
}
let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in {
@@ -301,13 +299,23 @@ let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mip
// n64
let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in {
def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>;
- def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>;
def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>;
def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>;
- def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>;
def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>;
}
+// Load/store doubleword indexed unaligned.
+let Predicates = [NotMips64, HasStandardEncoding] in {
+ def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>;
+ def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>;
+}
+
+let Predicates = [HasMips64, HasStandardEncoding],
+ DecoderNamespace="Mips64" in {
+ def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>;
+ def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>;
+}
+
/// Floating-point Aritmetic
defm FADD : FFR2P_M<0x00, "add", fadd, 1>;
defm FDIV : FFR2P_M<0x03, "div", fdiv>;
@@ -408,25 +416,23 @@ let Defs=[FCR31] in {
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
//===----------------------------------------------------------------------===//
-def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
- "# MOVCCRToCCR", []>;
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src),
+ "# MOVCCRToCCR", []>;
// This pseudo instr gets expanded into 2 mtc1 instrs after register
// allocation.
def BuildPairF64 :
- MipsPseudo<(outs AFGR64:$dst),
- (ins CPURegs:$lo, CPURegs:$hi), "",
- [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+ PseudoSE<(outs AFGR64:$dst),
+ (ins CPURegs:$lo, CPURegs:$hi), "",
+ [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
// This pseudo instr gets expanded into 2 mfc1 instrs after register
// allocation.
// if n is 0, lower part of src is extracted.
// if n is 1, higher part of src is extracted.
def ExtractElementF64 :
- MipsPseudo<(outs CPURegs:$dst),
- (ins AFGR64:$src, i32imm:$n), "",
- [(set CPURegs:$dst,
- (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
+ PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "",
+ [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
@@ -466,17 +472,3 @@ let Predicates = [IsFP64bit, HasStandardEncoding] in {
def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
}
-
-// Patterns for unaligned floating point loads and stores.
-let Predicates = [HasMips32r2Or64, NotN64, HasStandardEncoding] in {
- def : MipsPat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>;
- def : MipsPat<(store_u FGR32:$src, CPURegs:$addr),
- (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
-}
-
-let Predicates = [IsN64, HasStandardEncoding] in {
- def : MipsPat<(f32 (load_u CPU64Regs:$addr)),
- (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>;
- def : MipsPat<(store_u FGR32:$src, CPU64Regs:$addr),
- (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
-}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 15a77fb..8feb853 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -70,25 +70,35 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
let DecoderNamespace = "Mips";
field bits<32> SoftFail = 0;
+}
+// Mips32/64 Instruction Format
+class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin, Format f>:
+ MipsInst<outs, ins, asmstr, pattern, itin, f> {
let Predicates = [HasStandardEncoding];
-
}
// Mips Pseudo Instructions Format
class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
let isCodeGenOnly = 1;
let isPseudo = 1;
}
+// Mips32/64 Pseudo Instruction Format
+class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsPseudo<outs, ins, asmstr, pattern> {
+ let Predicates = [HasStandardEncoding];
+}
+
//===----------------------------------------------------------------------===//
// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
//===----------------------------------------------------------------------===//
class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst<outs, ins, asmstr, pattern, itin, FrmR>
+ InstSE<outs, ins, asmstr, pattern, itin, FrmR>
{
bits<5> rd;
bits<5> rs;
@@ -111,7 +121,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
//===----------------------------------------------------------------------===//
class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
+ InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmI>
{
bits<5> rt;
bits<5> rs;
@@ -126,7 +136,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
+ InstSE<outs, ins, asmstr, pattern, itin, FrmI>
{
bits<5> rs;
bits<5> rt;
@@ -144,7 +154,7 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
//===----------------------------------------------------------------------===//
class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmJ>
+ InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ>
{
bits<26> addr;
@@ -172,7 +182,7 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
string asmstr, list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
{
bits<5> fd;
bits<5> fs;
@@ -196,7 +206,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
//===----------------------------------------------------------------------===//
class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
{
bits<5> ft;
bits<5> base;
@@ -214,7 +224,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
//===----------------------------------------------------------------------===//
class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> fs;
bits<5> ft;
@@ -235,7 +245,7 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> rd;
bits<5> rs;
@@ -256,7 +266,7 @@ class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> fd;
bits<5> fs;
@@ -303,7 +313,7 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
// Floating point madd/msub/nmadd/nmsub.
class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
list<dag> pattern>
- : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+ : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
bits<5> fd;
bits<5> fr;
bits<5> fs;
@@ -321,7 +331,7 @@ class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
// FP indexed load/store instructions.
class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+ InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
{
bits<5> base;
bits<5> index;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 458e4f7..50e3eb5 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -27,68 +27,19 @@
using namespace llvm;
-MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
- TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
- InMips16Mode(TM.getSubtarget<MipsSubtarget>().inMips16Mode()),
- RI(*TM.getSubtargetImpl(), *this),
- UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
+ TM(tm), UncondBrOpc(UncondBr) {}
-const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
- return RI;
-}
+const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16InstrInfo(TM);
-static bool isZeroImm(const MachineOperand &op) {
- return op.isImm() && op.getImm() == 0;
+ return llvm::createMipsSEInstrInfo(TM);
}
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned MipsInstrInfo::
-isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
- unsigned Opc = MI->getOpcode();
-
- if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
- (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
- (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
- (Opc == Mips::LDC164_P8)) {
- if ((MI->getOperand(1).isFI()) && // is a stack slot
- (MI->getOperand(2).isImm()) && // the imm is zero
- (isZeroImm(MI->getOperand(2)))) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- }
-
- return 0;
-}
-
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned MipsInstrInfo::
-isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
- unsigned Opc = MI->getOpcode();
-
- if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
- (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
- (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
- (Opc == Mips::SDC164_P8)) {
- if ((MI->getOperand(1).isFI()) && // is a stack slot
- (MI->getOperand(2).isImm()) && // the imm is zero
- (isZeroImm(MI->getOperand(2)))) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- }
- return 0;
+bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const {
+ return op.isImm() && op.getImm() == 0;
}
/// insertNoop - If data hazard condition is found insert the target nop
@@ -100,83 +51,8 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
BuildMI(MBB, MI, DL, get(Mips::NOP));
}
-void MipsInstrInfo::
-copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- unsigned Opc = 0, ZeroReg = 0;
-
- if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
- if (Mips::CPURegsRegClass.contains(SrcReg)) {
- if (InMips16Mode)
- Opc=Mips::Mov32R16;
- else {
- Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
- }
- }
- else if (Mips::CCRRegClass.contains(SrcReg))
- Opc = Mips::CFC1;
- else if (Mips::FGR32RegClass.contains(SrcReg))
- Opc = Mips::MFC1;
- else if (SrcReg == Mips::HI)
- Opc = Mips::MFHI, SrcReg = 0;
- else if (SrcReg == Mips::LO)
- Opc = Mips::MFLO, SrcReg = 0;
- }
- else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
- if (Mips::CCRRegClass.contains(DestReg))
- Opc = Mips::CTC1;
- else if (Mips::FGR32RegClass.contains(DestReg))
- Opc = Mips::MTC1;
- else if (DestReg == Mips::HI)
- Opc = Mips::MTHI, DestReg = 0;
- else if (DestReg == Mips::LO)
- Opc = Mips::MTLO, DestReg = 0;
- }
- else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_S;
- else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_D32;
- else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
- Opc = Mips::FMOV_D64;
- else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
- Opc = Mips::MOVCCRToCCR;
- else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
- if (Mips::CPU64RegsRegClass.contains(SrcReg))
- Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
- else if (SrcReg == Mips::HI64)
- Opc = Mips::MFHI64, SrcReg = 0;
- else if (SrcReg == Mips::LO64)
- Opc = Mips::MFLO64, SrcReg = 0;
- else if (Mips::FGR64RegClass.contains(SrcReg))
- Opc = Mips::DMFC1;
- }
- else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
- if (DestReg == Mips::HI64)
- Opc = Mips::MTHI64, DestReg = 0;
- else if (DestReg == Mips::LO64)
- Opc = Mips::MTLO64, DestReg = 0;
- else if (Mips::FGR64RegClass.contains(DestReg))
- Opc = Mips::DMTC1;
- }
-
- assert(Opc && "Cannot copy registers");
-
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
-
- if (DestReg)
- MIB.addReg(DestReg, RegState::Define);
-
- if (ZeroReg)
- MIB.addReg(ZeroReg);
-
- if (SrcReg)
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
- unsigned Flag) {
+MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
@@ -185,130 +61,6 @@ static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
MFI.getObjectSize(FI), Align);
}
-void MipsInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
- MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
-
- unsigned Opc = 0;
-
- if (Mips::CPURegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
- else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
- else if (Mips::FGR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
- else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
- Opc = Mips::SDC1;
- else if (Mips::FGR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
-
- assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
-}
-
-void MipsInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
- MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
- unsigned Opc = 0;
-
- if (Mips::CPURegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
- else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
- else if (Mips::FGR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
- else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
- Opc = Mips::LDC1;
- else if (Mips::FGR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
-
- assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
- .addMemOperand(MMO);
-}
-
-void MipsInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned Opc) const {
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc))
- .addReg(Mips::RA);
-}
-
-void MipsInstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned Opc) const {
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc));
-}
-
-void MipsInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetInstrInfo *TII = TM.getInstrInfo();
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned SrcReg = I->getOperand(1).getReg();
- unsigned N = I->getOperand(2).getImm();
- const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
- DebugLoc dl = I->getDebugLoc();
-
- assert(N < 2 && "Invalid immediate");
- unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
- unsigned SubReg = TM.getRegisterInfo()->getSubReg(SrcReg, SubIdx);
-
- BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
-}
-
-void MipsInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetInstrInfo *TII = TM.getInstrInfo();
- unsigned DstReg = I->getOperand(0).getReg();
- unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
- const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
- DebugLoc dl = I->getDebugLoc();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
- // mtc1 Lo, $fp
- // mtc1 Hi, $fp + 1
- BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpeven))
- .addReg(LoReg);
- BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpodd))
- .addReg(HiReg);
-}
-
-bool MipsInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- MachineBasicBlock &MBB = *MI->getParent();
-
- switch(MI->getDesc().getOpcode()) {
- default:
- return false;
- case Mips::RetRA:
- ExpandRetRA(MBB, MI, Mips::RET);
- break;
- case Mips::RetRA16:
- ExpandRetRA16(MBB, MI, Mips::JrRa16);
- break;
- case Mips::BuildPairF64:
- ExpandBuildPairF64(MBB, MI);
- break;
- case Mips::ExtractElementF64:
- ExpandExtractElementF64(MBB, MI);
- break;
- }
-
- MBB.erase(MI);
- return true;
-}
-
MachineInstr*
MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
uint64_t Offset, const MDNode *MDPtr,
@@ -322,42 +74,9 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
// Branch Analysis
//===----------------------------------------------------------------------===//
-static unsigned GetAnalyzableBrOpc(unsigned Opc) {
- return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
- Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
- Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
- Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
- Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
- Opc == Mips::J) ?
- Opc : 0;
-}
-
-/// GetOppositeBranchOpc - Return the inverse of the specified
-/// opcode, e.g. turning BEQ to BNE.
-unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
-{
- switch (Opc) {
- default: llvm_unreachable("Illegal opcode!");
- case Mips::BEQ: return Mips::BNE;
- case Mips::BNE: return Mips::BEQ;
- case Mips::BGTZ: return Mips::BLEZ;
- case Mips::BGEZ: return Mips::BLTZ;
- case Mips::BLTZ: return Mips::BGEZ;
- case Mips::BLEZ: return Mips::BGTZ;
- case Mips::BEQ64: return Mips::BNE64;
- case Mips::BNE64: return Mips::BEQ64;
- case Mips::BGTZ64: return Mips::BLEZ64;
- case Mips::BGEZ64: return Mips::BLTZ64;
- case Mips::BLTZ64: return Mips::BGEZ64;
- case Mips::BLEZ64: return Mips::BGTZ64;
- case Mips::BC1T: return Mips::BC1F;
- case Mips::BC1F: return Mips::BC1T;
- }
-}
-
-static void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
- MachineBasicBlock *&BB,
- SmallVectorImpl<MachineOperand> &Cond) {
+void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand> &Cond) const {
assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
int NumOp = Inst->getNumExplicitOperands();
@@ -527,7 +246,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
{
assert( (Cond.size() && Cond.size() <= 3) &&
"Invalid Mips branch condition!");
- Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
+ Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm()));
return false;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 358f817..7d56259 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -26,99 +26,69 @@
namespace llvm {
class MipsInstrInfo : public MipsGenInstrInfo {
+protected:
MipsTargetMachine &TM;
- bool IsN64; bool InMips16Mode;
- const MipsRegisterInfo RI;
unsigned UncondBrOpc;
+
public:
- explicit MipsInstrInfo(MipsTargetMachine &TM);
+ explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- virtual const MipsRegisterInfo &getRegisterInfo() const;
-
- /// isLoadFromStackSlot - If the specified machine instruction is a direct
- /// load from a stack slot, return the virtual or physical register number of
- /// the destination along with the FrameIndex of the loaded stack slot. If
- /// not, return 0. This predicate must return 0 if the instruction has
- /// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- /// isStoreToStackSlot - If the specified machine instruction is a direct
- /// store to a stack slot, return the virtual or physical register number of
- /// the source reg along with the FrameIndex of the loaded stack slot. If
- /// not, return 0. This predicate must return 0 if the instruction has
- /// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ static const MipsInstrInfo *create(MipsTargetMachine &TM);
/// Branch Analysis
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
-private:
- void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned Opc) const;
- void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned Opc) const;
- void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
- const SmallVectorImpl<MachineOperand>& Cond) const;
- void ExpandExtractElementF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
- void ExpandBuildPairF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-public:
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
const MDNode *MDPtr,
DebugLoc DL) const;
- virtual
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
/// Insert nop instruction when hazard condition is found
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MipsRegisterInfo &getRegisterInfo() const = 0;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0;
+
/// Return the number of bytes of code the specified instruction may be.
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+protected:
+ bool isZeroImm(const MachineOperand &op) const;
+
+ MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI,
+ unsigned Flag) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0;
+
+ void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand> &Cond) const;
+
+ void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond) const;
};
namespace Mips {
- /// GetOppositeBranchOpc - Return the inverse of the specified
- /// opcode, e.g. turning BEQ to BNE.
- unsigned GetOppositeBranchOpc(unsigned Opc);
-
/// Emit a series of instructions to load an immediate. All instructions
/// except for the last one are emitted. The function returns the number of
/// MachineInstrs generated. The opcode-immediate pair of the last
@@ -130,6 +100,10 @@ namespace Mips {
MipsAnalyzeImmediate::Inst *LastInst);
}
+/// Create MipsInstrInfo objects.
+const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM);
+const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM);
+
}
#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index f1aada4..fd952ef 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -208,17 +208,24 @@ def uimm16 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
}
+def MipsMemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let ParserMethod = "parseMemOperand";
+}
+
// Address operand
def mem : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemAsmOperand;
}
def mem64 : Operand<i64> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemAsmOperand;
}
def mem_ea : Operand<i32> {
@@ -722,9 +729,11 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
let neverHasSideEffects = 1;
}
-class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> :
- FMem<0x09, (outs RC:$rt), (ins Mem:$addr),
- instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>;
+class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> :
+ FMem<opc, (outs RC:$rt), (ins Mem:$addr),
+ instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> {
+ let isCodeGenOnly = 1;
+}
// Count Leading Ones/Zeros in Word
class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
@@ -803,9 +812,9 @@ class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
RegisterClass PRC> :
- MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
- !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
+ PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
+ !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>,
@@ -819,9 +828,9 @@ multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
// Atomic Compare & Swap.
class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
RegisterClass PRC> :
- MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
- !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
+ PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
+ !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
+ [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
multiclass AtomicCmpSwap32<PatFrag Op, string Width> {
def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>,
@@ -851,14 +860,13 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
// Return RA.
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : MipsPseudo<(outs), (ins), "", [(MipsRet)]>;
+def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>;
-// As stack alignment is always done with addiu, we need a 16-bit immediate
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
"!ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
"!ADJCALLSTACKUP $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
@@ -868,8 +876,8 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
// are used, we have the same behavior, but get also a bunch of warnings
// from the assembler.
let neverHasSideEffects = 1 in
-def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp),
- ".cprestore\t$loc", []>;
+def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp),
+ ".cprestore\t$loc", []>;
let usesCustomInserter = 1 in {
defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
@@ -969,8 +977,8 @@ defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>;
defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>;
let hasSideEffects = 1 in
-def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
- [(MipsSync imm:$stype)], NoItinerary, FrmOther>
+def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype",
+ [(MipsSync imm:$stype)], NoItinerary, FrmOther>
{
bits<5> stype;
let Opcode = 0;
@@ -1046,17 +1054,13 @@ let addr=0 in
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> {
- let isCodeGenOnly = 1;
-}
+def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
// DynAlloc node points to dynamically allocated stack space.
// $sp is added to the list of implicitly used registers to prevent dead code
// elimination from removing instructions that modify $sp.
let Uses = [SP] in
-def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> {
- let isCodeGenOnly = 1;
-}
+def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
// MADD*/MSUB*
def MADD : MArithR<0, "madd", MipsMAdd, 1>;
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index 150bdbb..052046a 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -27,7 +27,52 @@ using namespace llvm;
void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)Old;
+ const unsigned NopInstr = 0x0;
+
+ // If the functions are in the same memory segment, insert PC-region branch.
+ if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) {
+ unsigned *OldInstruction = (unsigned *)Old;
+ *OldInstruction = 0x08000000;
+ unsigned JTargetAddr = NewAddr & 0x0FFFFFFC;
+
+ JTargetAddr >>= 2;
+ *OldInstruction |= JTargetAddr;
+
+ // Insert a NOP.
+ OldInstruction++;
+ *OldInstruction = NopInstr;
+
+ sys::Memory::InvalidateInstructionCache(Old, 2 * 4);
+ } else {
+ // We need to clear hint bits from the instruction, in case it is 'jr ra'.
+ const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008;
+ unsigned* CurrentInstr = (unsigned*)Old;
+ unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask;
+ unsigned* NextInstr = CurrentInstr + 1;
+ unsigned NextInstrHintClear = (*NextInstr) & HintMask;
+
+ // Do absolute jump if there are 2 or more instructions before return from
+ // the old function.
+ if ((CurrInstrHintClear != ReturnSequence) &&
+ (NextInstrHintClear != ReturnSequence)) {
+ const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000;
+ const unsigned JrT0Instr = 0x01000008;
+ // lui t0, high 16 bit of the NewAddr
+ (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16);
+ // addiu t0, t0, low 16 bit of the NewAddr
+ (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff);
+ // jr t0
+ (*(CurrentInstr++)) = JrT0Instr;
+ (*CurrentInstr) = NopInstr;
+
+ sys::Memory::InvalidateInstructionCache(Old, 4 * 4);
+ } else {
+ // Unsupported case
+ report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+ }
+ }
}
/// JITCompilerFunction - This contains the address of the JIT function used to
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 70ecbc1..f78203f 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -207,7 +207,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
// MachineBasicBlock operand MBBOpnd.
void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
DebugLoc DL, MachineBasicBlock *MBBOpnd) {
- unsigned NewOpc = Mips::GetOppositeBranchOpc(Br->getOpcode());
+ unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode());
const MCInstrDesc &NewDesc = TII->get(NewOpc);
MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc);
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index b2232c6..df3c4c0 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -48,8 +48,6 @@ class MipsFunctionInfo : public MachineFunctionInfo {
// OutArgFIRange: Range of indices of all frame objects created during call to
// LowerCall except for the frame object for restoring $gp.
std::pair<int, int> InArgFIRange, OutArgFIRange;
- int GlobalRegFI;
- mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize;
bool EmitNOAT;
@@ -58,8 +56,7 @@ public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
- OutArgFIRange(std::make_pair(-1, 0)), GlobalRegFI(0), DynAllocFI(0),
- MaxCallFrameSize(0), EmitNOAT(false)
+ OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false)
{}
bool isInArgFI(int FI) const {
@@ -77,34 +74,6 @@ public:
OutArgFIRange.second = LastFI;
}
- bool isGlobalRegFI(int FI) const {
- return GlobalRegFI && (FI == GlobalRegFI);
- }
-
- int getGlobalRegFI() const {
- return GlobalRegFI;
- }
-
- int initGlobalRegFI() {
- const TargetMachine &TM = MF.getTarget();
- unsigned RegSize = TM.getSubtarget<MipsSubtarget>().isABI_N64() ? 8 : 4;
- int64_t StackAlignment = TM.getFrameLowering()->getStackAlignment();
- uint64_t Offset = RoundUpToAlignment(MaxCallFrameSize, StackAlignment);
-
- GlobalRegFI = MF.getFrameInfo()->CreateFixedObject(RegSize, Offset, true);
- return GlobalRegFI;
- }
-
- // The first call to this function creates a frame object for dynamically
- // allocated stack area.
- int getDynAllocFI() const {
- if (!DynAllocFI)
- DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
-
- return DynAllocFI;
- }
- bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
-
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index a3ce236..ae6ae3a 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -144,15 +144,6 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
return true;
}
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void MipsRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
@@ -161,8 +152,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
@@ -182,68 +171,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
<< "spOffset : " << spOffset << "\n"
<< "stackSize : " << stackSize << "\n");
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- int MinCSFI = 0;
- int MaxCSFI = -1;
-
- if (CSI.size()) {
- MinCSFI = CSI[0].getFrameIdx();
- MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
- }
-
- // The following stack frame objects are always referenced relative to $sp:
- // 1. Outgoing arguments.
- // 2. Pointer to dynamically allocated stack space.
- // 3. Locations for callee-saved registers.
- // Everything else is referenced relative to whatever register
- // getFrameRegister() returns.
- unsigned FrameReg;
-
- if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
- (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
- FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
- else
- FrameReg = getFrameRegister(MF);
-
- // Calculate final offset.
- // - There is no need to change the offset if the frame object is one of the
- // following: an outgoing argument, pointer to a dynamically allocated
- // stack space or a $gp restore location,
- // - If the frame object is any of the following, its offset must be adjusted
- // by adding the size of the stack:
- // incoming argument, callee-saved register location or local variable.
- int64_t Offset;
-
- if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
- MipsFI->isGlobalRegFI(FrameIndex))
- Offset = spOffset;
- else
- Offset = spOffset + (int64_t)stackSize;
-
- Offset += MI.getOperand(i+1).getImm();
-
- DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
-
- // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
- // field.
- if (!MI.isDebugValue() && !isInt<16>(Offset)) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = II->getDebugLoc();
- unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
- MipsAnalyzeImmediate::Inst LastInst(0, 0);
-
- MipsFI->setEmitNOAT();
- Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
- &LastInst);
- BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
-
- FrameReg = ATReg;
- Offset = SignExtend64<16>(LastInst.ImmOpnd);
- }
-
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ eliminateFI(MI, i, FrameIndex, stackSize, spOffset);
}
unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index f320bae..9a05e94 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -25,10 +25,12 @@ class MipsSubtarget;
class TargetInstrInfo;
class Type;
-struct MipsRegisterInfo : public MipsGenRegisterInfo {
+class MipsRegisterInfo : public MipsGenRegisterInfo {
+protected:
const MipsSubtarget &Subtarget;
const TargetInstrInfo &TII;
+public:
MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
/// getRegisterNumbering - Given the enum value for some register, e.g.
@@ -51,10 +53,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
@@ -67,6 +65,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
/// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const = 0;
};
} // end namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index b255e42..4015add 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -239,6 +239,9 @@ let Namespace = "Mips" in {
// fcc0 register
def FCC0 : Register<"fcc0">;
+ // PC register
+ def PC : Register<"pc">;
+
// Hardware register $29
def HWR29 : Register<"29">;
def HWR29_64 : Register<"29">;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
new file mode 100644
index 0000000..1c59847
--- /dev/null
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -0,0 +1,210 @@
+//===-- MipsSEFrameLowering.cpp - Mips32/64 Frame Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+ // First, compute final stack size.
+ uint64_t StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
+ // Adjust stack.
+ TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ if (CSI.size()) {
+ // Find the instruction past the last instruction that saves a callee-saved
+ // register to the stack.
+ for (unsigned i = 0; i < CSI.size(); ++i)
+ ++MBBI;
+
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+
+ // If Reg is a double precision register, emit two cfa_offsets,
+ // one for each of the paired single precision registers.
+ if (Mips::AFGR64RegClass.contains(Reg)) {
+ MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
+ MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
+ MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven));
+ MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd));
+
+ if (!STI.isLittle())
+ std::swap(SrcML0, SrcML1);
+
+ Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
+ Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
+ } else {
+ // Reg is either in CPURegs or FGR32.
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(Reg);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+ }
+ }
+ }
+
+ // if framepointer enabled, set it to point to the stack pointer.
+ if (hasFP(MF)) {
+ // Insert instruction "move $fp, $sp" at this location.
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
+
+ // emit ".cfi_def_cfa_register $fp"
+ MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
+ DstML = MachineLocation(FP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
+ }
+}
+
+void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+ // if framepointer enabled, restore the stack pointer.
+ if (hasFP(MF)) {
+ // Find the first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instruction "move $sp, $fp" at this location.
+ BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
+ }
+
+ // Get the number of bytes from FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (!StackSize)
+ return;
+
+ // Adjust stack.
+ TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
+}
+
+bool MipsSEFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *EntryBlock = MF->begin();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in. Do not add if the register is
+ // RA and return address is taken, because it has already been added in
+ // method MipsTargetLowering::LowerRETURNADDR.
+ // It's killed at the spill, unless the register is RA and return address
+ // is taken.
+ unsigned Reg = CSI[i].getReg();
+ bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
+ && MF->getFrameInfo()->isReturnAddressTaken();
+ if (!IsRAAndRetAddrIsTaken)
+ EntryBlock->addLiveIn(Reg);
+
+ // Insert the spill to the stack frame.
+ bool IsKill = !IsRAAndRetAddrIsTaken;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+
+ return true;
+}
+
+bool
+MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Reserve call frame if the size of the maximum call frame fits into 16-bit
+ // immediate field and there are no variable sized objects on the stack.
+ return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+}
+
+void MipsSEFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+
+ // Mark $fp as used if function has dedicated frame pointer.
+ if (hasFP(MF))
+ MRI.setPhysRegUsed(FP);
+}
+
+const MipsFrameLowering *
+llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) {
+ return new MipsSEFrameLowering(ST);
+}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
new file mode 100644
index 0000000..6481a0a
--- /dev/null
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -0,0 +1,44 @@
+//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSE_FRAMEINFO_H
+#define MIPSSE_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+
+class MipsSEFrameLowering : public MipsFrameLowering {
+public:
+ explicit MipsSEFrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI) {}
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
new file mode 100644
index 0000000..eeb1de3
--- /dev/null
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -0,0 +1,320 @@
+//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
+ : MipsInstrInfo(tm,
+ tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
+ RI(*tm.getSubtargetImpl(), *this),
+ IsN64(tm.getSubtarget<MipsSubtarget>().isABI_N64()) {}
+
+const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsSEInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
+ (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
+ (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
+ (Opc == Mips::LDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsSEInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
+ (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
+ (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
+ (Opc == Mips::SDC164_P8)) {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0, ZeroReg = 0;
+
+ if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+ if (Mips::CPURegsRegClass.contains(SrcReg))
+ Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ else if (Mips::CCRRegClass.contains(SrcReg))
+ Opc = Mips::CFC1;
+ else if (Mips::FGR32RegClass.contains(SrcReg))
+ Opc = Mips::MFC1;
+ else if (SrcReg == Mips::HI)
+ Opc = Mips::MFHI, SrcReg = 0;
+ else if (SrcReg == Mips::LO)
+ Opc = Mips::MFLO, SrcReg = 0;
+ }
+ else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
+ if (Mips::CCRRegClass.contains(DestReg))
+ Opc = Mips::CTC1;
+ else if (Mips::FGR32RegClass.contains(DestReg))
+ Opc = Mips::MTC1;
+ else if (DestReg == Mips::HI)
+ Opc = Mips::MTHI, DestReg = 0;
+ else if (DestReg == Mips::LO)
+ Opc = Mips::MTLO, DestReg = 0;
+ }
+ else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_S;
+ else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D32;
+ else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::FMOV_D64;
+ else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::MOVCCRToCCR;
+ else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
+ if (Mips::CPU64RegsRegClass.contains(SrcReg))
+ Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ else if (SrcReg == Mips::HI64)
+ Opc = Mips::MFHI64, SrcReg = 0;
+ else if (SrcReg == Mips::LO64)
+ Opc = Mips::MFLO64, SrcReg = 0;
+ else if (Mips::FGR64RegClass.contains(SrcReg))
+ Opc = Mips::DMFC1;
+ }
+ else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
+ if (DestReg == Mips::HI64)
+ Opc = Mips::MTHI64, DestReg = 0;
+ else if (DestReg == Mips::LO64)
+ Opc = Mips::MTLO64, DestReg = 0;
+ else if (Mips::FGR64RegClass.contains(DestReg))
+ Opc = Mips::DMTC1;
+ }
+
+ assert(Opc && "Cannot copy registers");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+ if (DestReg)
+ MIB.addReg(DestReg, RegState::Define);
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
+
+ if (SrcReg)
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void MipsSEInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+
+ unsigned Opc = 0;
+
+ if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+ else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+ Opc = Mips::SDC1;
+ else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+}
+
+void MipsSEInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
+ unsigned Opc = 0;
+
+ if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+ else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LDC1;
+ else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
+}
+
+bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ switch(MI->getDesc().getOpcode()) {
+ default:
+ return false;
+ case Mips::RetRA:
+ ExpandRetRA(MBB, MI, Mips::RET);
+ break;
+ case Mips::BuildPairF64:
+ ExpandBuildPairF64(MBB, MI);
+ break;
+ case Mips::ExtractElementF64:
+ ExpandExtractElementF64(MBB, MI);
+ break;
+ }
+
+ MBB.erase(MI);
+ return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BEQ: return Mips::BNE;
+ case Mips::BNE: return Mips::BEQ;
+ case Mips::BGTZ: return Mips::BLEZ;
+ case Mips::BGEZ: return Mips::BLTZ;
+ case Mips::BLTZ: return Mips::BGEZ;
+ case Mips::BLEZ: return Mips::BGTZ;
+ case Mips::BEQ64: return Mips::BNE64;
+ case Mips::BNE64: return Mips::BEQ64;
+ case Mips::BGTZ64: return Mips::BLEZ64;
+ case Mips::BGEZ64: return Mips::BLTZ64;
+ case Mips::BLTZ64: return Mips::BGEZ64;
+ case Mips::BLEZ64: return Mips::BGTZ64;
+ case Mips::BC1T: return Mips::BC1F;
+ case Mips::BC1F: return Mips::BC1T;
+ }
+}
+
+/// Adjust SP by Amount bytes.
+void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+
+ if (isInt<16>(Amount))// addi sp, sp, amount
+ BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
+ else { // Expand immediate that doesn't fit in 16-bit.
+ unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
+
+ MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT();
+ Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0);
+ BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg);
+ }
+}
+
+unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
+ Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
+ Opc == Mips::J) ?
+ Opc : 0;
+}
+
+void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA);
+}
+
+void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned SrcReg = I->getOperand(1).getReg();
+ unsigned N = I->getOperand(2).getImm();
+ const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1);
+ DebugLoc dl = I->getDebugLoc();
+
+ assert(N < 2 && "Invalid immediate");
+ unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
+ unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
+
+ BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
+}
+
+void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+ const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
+ DebugLoc dl = I->getDebugLoc();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven))
+ .addReg(LoReg);
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd))
+ .addReg(HiReg);
+}
+
+const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
+ return new MipsSEInstrInfo(TM);
+}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
new file mode 100644
index 0000000..346e74d
--- /dev/null
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -0,0 +1,86 @@
+//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEINSTRUCTIONINFO_H
+#define MIPSSEINSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSERegisterInfo.h"
+
+namespace llvm {
+
+class MipsSEInstrInfo : public MipsInstrInfo {
+ const MipsSERegisterInfo RI;
+ bool IsN64;
+
+public:
+ explicit MipsSEInstrInfo(MipsTargetMachine &TM);
+
+ virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+ /// Adjust SP by Amount bytes.
+ void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+private:
+ virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+ void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned Opc) const;
+ void ExpandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ void ExpandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
new file mode 100644
index 0000000..043a1ef
--- /dev/null
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -0,0 +1,138 @@
+//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSERegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &TII)
+ : MipsRegisterInfo(ST, TII) {}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsSERegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII);
+ unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+
+ II->adjustStackPtr(SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
+
+void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+ unsigned OpNo, int FrameIndex,
+ uint64_t StackSize,
+ int64_t SPOffset) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The following stack frame objects are always referenced relative to $sp:
+ // 1. Outgoing arguments.
+ // 2. Pointer to dynamically allocated stack space.
+ // 3. Locations for callee-saved registers.
+ // Everything else is referenced relative to whatever register
+ // getFrameRegister() returns.
+ unsigned FrameReg;
+
+ if (MipsFI->isOutArgFI(FrameIndex) ||
+ (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ else
+ FrameReg = getFrameRegister(MF);
+
+ // Calculate final offset.
+ // - There is no need to change the offset if the frame object is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
+ // - If the frame object is any of the following, its offset must be adjusted
+ // by adding the size of the stack:
+ // incoming argument, callee-saved register location or local variable.
+ int64_t Offset;
+
+ if (MipsFI->isOutArgFI(FrameIndex))
+ Offset = SPOffset;
+ else
+ Offset = SPOffset + (int64_t)StackSize;
+
+ Offset += MI.getOperand(OpNo + 1).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+ // field.
+ if (!MI.isDebugValue() && !isInt<16>(Offset)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
+ MipsAnalyzeImmediate::Inst LastInst(0, 0);
+
+ MipsFI->setEmitNOAT();
+ Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
+ &LastInst);
+ BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
+
+ FrameReg = ATReg;
+ Offset = SignExtend64<16>(LastInst.ImmOpnd);
+ }
+
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+ MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+}
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
new file mode 100644
index 0000000..4b17b33
--- /dev/null
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -0,0 +1,39 @@
+//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEREGISTERINFO_H
+#define MIPSSEREGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class MipsSERegisterInfo : public MipsRegisterInfo {
+public:
+ MipsSERegisterInfo(const MipsSubtarget &Subtarget,
+ const TargetInstrInfo &TII);
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+private:
+ virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 3215c44..ba15362 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -89,6 +89,9 @@ protected:
// InMips16 -- can process Mips16 instructions
bool InMips16Mode;
+ // IsAndroid -- target is android
+ bool IsAndroid;
+
InstrItineraryData InstrItins;
public:
@@ -128,6 +131,7 @@ public:
bool isNotSingleFloat() const { return !IsSingleFloat; }
bool hasVFPU() const { return HasVFPU; }
bool inMips16Mode() const { return InMips16Mode; }
+ bool isAndroid() const { return IsAndroid; }
bool isLinux() const { return IsLinux; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index dd5d35f..2928a73 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -13,6 +13,8 @@
#include "MipsTargetMachine.h"
#include "Mips.h"
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
@@ -22,8 +24,8 @@ extern "C" void LLVMInitializeMipsTarget() {
// Register the target.
RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
- RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target);
- RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget);
+ RegisterTargetMachine<MipsebTargetMachine> A(TheMips64Target);
+ RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
}
// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -48,9 +50,10 @@ MipsTargetMachine(const Target &T, StringRef TT,
(Subtarget.isABI_N64() ?
"E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
"E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ InstrInfo(MipsInstrInfo::create(*this)),
+ FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
+ TLInfo(*this), TSInfo(*this), JITInfo(),
+ ELFWriterInfo(false, isLittle) {
}
void MipsebTargetMachine::anchor() { }
@@ -71,24 +74,6 @@ MipselTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void Mips64ebTargetMachine::anchor() { }
-
-Mips64ebTargetMachine::
-Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-
-void Mips64elTargetMachine::anchor() { }
-
-Mips64elTargetMachine::
-Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-
namespace {
/// Mips Code Generator Pass Configuration Options.
class MipsPassConfig : public TargetPassConfig {
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 5cbf057..a542ef6 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -20,59 +20,67 @@
#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
#include "MipsSubtarget.h"
+#include "MipsELFWriterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class formatted_raw_ostream;
-
- class MipsTargetMachine : public LLVMTargetMachine {
- MipsSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- MipsInstrInfo InstrInfo;
- MipsFrameLowering FrameLowering;
- MipsTargetLowering TLInfo;
- MipsSelectionDAGInfo TSInfo;
- MipsJITInfo JITInfo;
-
- public:
- MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
-
- virtual const MipsInstrInfo *getInstrInfo() const
- { return &InstrInfo; }
- virtual const TargetFrameLowering *getFrameLowering() const
- { return &FrameLowering; }
- virtual const MipsSubtarget *getSubtargetImpl() const
- { return &Subtarget; }
- virtual const TargetData *getTargetData() const
- { return &DataLayout;}
- virtual MipsJITInfo *getJITInfo()
- { return &JITInfo; }
-
-
- virtual const MipsRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
-
- virtual const MipsTargetLowering *getTargetLowering() const {
- return &TLInfo;
- }
-
- virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
-
- // Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
- };
-
-/// MipsebTargetMachine - Mips32 big endian target machine.
+class formatted_raw_ostream;
+class MipsRegisterInfo;
+
+class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ const MipsInstrInfo *InstrInfo;
+ const MipsFrameLowering *FrameLowering;
+ MipsTargetLowering TLInfo;
+ MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
+ MipsELFWriterInfo ELFWriterInfo;
+
+public:
+ MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool isLittle);
+
+ virtual ~MipsTargetMachine() { delete InstrInfo; }
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return InstrInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return FrameLowering; }
+ virtual const MipsSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+ virtual MipsJITInfo *getJITInfo()
+ { return &JITInfo; }
+
+ virtual const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
+ virtual const MipsTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const MipsELFWriterInfo *getELFWriterInfo() const {
+ return &ELFWriterInfo;
+ }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+};
+
+/// MipsebTargetMachine - Mips32/64 big endian target machine.
///
class MipsebTargetMachine : public MipsTargetMachine {
virtual void anchor();
@@ -83,7 +91,7 @@ public:
CodeGenOpt::Level OL);
};
-/// MipselTargetMachine - Mips32 little endian target machine.
+/// MipselTargetMachine - Mips32/64 little endian target machine.
///
class MipselTargetMachine : public MipsTargetMachine {
virtual void anchor();
@@ -94,29 +102,6 @@ public:
CodeGenOpt::Level OL);
};
-/// Mips64ebTargetMachine - Mips64 big endian target machine.
-///
-class Mips64ebTargetMachine : public MipsTargetMachine {
- virtual void anchor();
-public:
- Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
-
-/// Mips64elTargetMachine - Mips64 little endian target machine.
-///
-class Mips64elTargetMachine : public MipsTargetMachine {
- virtual void anchor();
-public:
- Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-};
} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index f50f9b5..2a2abb1 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -337,7 +337,10 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
// can get a useful trip count. The trip count can
// be either a register or an immediate. The location
// of the value depends upon the type (reg or imm).
- while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ for (MachineRegisterInfo::reg_iterator
+ RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+ RI != RE; ++RI) {
+ IV_Opnd = &RI.getOperand();
bool SignedCmp;
MachineInstr *MI = IV_Opnd->getParent();
if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 13250b3..61d44c5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -106,7 +106,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
- // We do not currently implment this libm ops for PowerPC.
+ // We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
@@ -394,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
- if (Subtarget->has64BitSupport())
+ if (Subtarget->has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+ }
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 91c5366..39778a5 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -265,6 +265,15 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+ "mfspr $rT, 268", SprMFTB>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+// Note that encoding mftb using mfspr is now the preferred form,
+// and has been since at least ISA v2.03. The mftb instruction has
+// now been phased out. Using mfspr, however, is known not to work on
+// the POWER3.
+
let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
[(set G8RC:$result,
diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile
index a101aa4..2d0560d 100644
--- a/lib/Target/PowerPC/TargetInfo/Makefile
+++ b/lib/Target/PowerPC/TargetInfo/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
LIBRARYNAME = LLVMPowerPCInfo
# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+override CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index cbfa4cf..9c27f27 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2367,8 +2367,3 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
should fold to x > y.
//===---------------------------------------------------------------------===//
-
-int f(double x) { return __builtin_fabs(x) < 0.0; }
-should fold to false.
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 6357468..ff8d3c5 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -109,9 +109,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-void SparcRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
-
unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index ec95ad4..8e215a7 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -24,64 +24,72 @@ void TargetLibraryInfo::anchor() { }
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
+ "__cxa_atexit",
+ "__cxa_guard_abort",
+ "__cxa_guard_acquire",
+ "__cxa_guard_release",
+ "__memcpy_chk",
"acos",
- "acosl",
"acosf",
+ "acosl",
"asin",
- "asinl",
"asinf",
+ "asinl",
"atan",
- "atanl",
- "atanf",
"atan2",
- "atan2l",
"atan2f",
+ "atan2l",
+ "atanf",
+ "atanl",
"ceil",
- "ceill",
"ceilf",
+ "ceill",
"copysign",
"copysignf",
"copysignl",
"cos",
- "cosl",
"cosf",
"cosh",
- "coshl",
"coshf",
+ "coshl",
+ "cosl",
"exp",
- "expl",
- "expf",
"exp2",
- "exp2l",
"exp2f",
+ "exp2l",
+ "expf",
+ "expl",
"expm1",
- "expm1l",
"expm1f",
+ "expm1l",
"fabs",
- "fabsl",
"fabsf",
+ "fabsl",
+ "fiprintf",
"floor",
- "floorl",
"floorf",
- "fiprintf",
+ "floorl",
"fmod",
- "fmodl",
"fmodf",
+ "fmodl",
+ "fputc",
"fputs",
"fwrite",
"iprintf",
"log",
- "logl",
- "logf",
- "log2",
- "log2l",
- "log2f",
"log10",
- "log10l",
"log10f",
+ "log10l",
"log1p",
- "log1pl",
"log1pf",
+ "log1pl",
+ "log2",
+ "log2f",
+ "log2l",
+ "logf",
+ "logl",
+ "memchr",
+ "memcmp",
"memcpy",
"memmove",
"memset",
@@ -92,6 +100,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"pow",
"powf",
"powl",
+ "putchar",
+ "puts",
"rint",
"rintf",
"rintl",
@@ -99,36 +109,48 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"roundf",
"roundl",
"sin",
- "sinl",
"sinf",
"sinh",
- "sinhl",
"sinhf",
+ "sinhl",
+ "sinl",
"siprintf",
"sqrt",
- "sqrtl",
"sqrtf",
+ "sqrtl",
+ "strcat",
+ "strchr",
+ "strcpy",
+ "strlen",
+ "strncat",
+ "strncmp",
+ "strncpy",
+ "strnlen",
"tan",
- "tanl",
"tanf",
"tanh",
- "tanhl",
"tanhf",
+ "tanhl",
+ "tanl",
"trunc",
"truncf",
- "truncl",
- "__cxa_atexit",
- "__cxa_guard_abort",
- "__cxa_guard_acquire",
- "__cxa_guard_release"
+ "truncl"
};
/// initialize - Initialize the set of available library functions based on the
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
-static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+static void initialize(TargetLibraryInfo &TLI, const Triple &T,
+ const char **StandardNames) {
initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+#ifndef NDEBUG
+ // Verify that the StandardNames array is in alphabetical order.
+ for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
+ if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
+ llvm_unreachable("TargetLibraryInfo function names must be sorted");
+ }
+#endif // !NDEBUG
// memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
if (T.isMacOSX()) {
@@ -240,14 +262,14 @@ TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
// Default to everything being available.
memset(AvailableArray, -1, sizeof(AvailableArray));
- initialize(*this, Triple());
+ initialize(*this, Triple(), StandardNames);
}
TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
// Default to everything being available.
memset(AvailableArray, -1, sizeof(AvailableArray));
- initialize(*this, T);
+ initialize(*this, T, StandardNames);
}
TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
@@ -256,6 +278,17 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
CustomNames = TLI.CustomNames;
}
+bool TargetLibraryInfo::getLibFunc(StringRef funcName,
+ LibFunc::Func &F) const {
+ const char **Start = &StandardNames[0];
+ const char **End = &StandardNames[LibFunc::NumLibFuncs];
+ const char **I = std::lower_bound(Start, End, funcName);
+ if (I != End && *I == funcName) {
+ F = (LibFunc::Func)(I - Start);
+ return true;
+ }
+ return false;
+}
/// disableAllFunctions - This disables all builtins, which is used for options
/// like -fno-builtin.
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 95e83ec..73a0095 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -39,7 +39,9 @@ private:
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+ bool matchingInlineAsm = false) {
+ if (matchingInlineAsm) return true;
return Parser.Error(L, Msg, Ranges);
}
@@ -65,6 +67,12 @@ private:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
+ bool MatchInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ SmallVectorImpl<MCInst> &MCInsts,
+ unsigned &OrigErrorInfo,
+ bool matchingInlineAsm = false);
+
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
@@ -1508,9 +1516,24 @@ bool X86AsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
+ SmallVector<MCInst, 2> Insts;
+ unsigned ErrorInfo;
+ bool Error = MatchInstruction(IDLoc, Operands, Insts, ErrorInfo);
+ if (!Error)
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+ Out.EmitInstruction(Insts[i]);
+ return Error;
+}
+
+bool X86AsmParser::
+MatchInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo,
+ bool matchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
@@ -1523,7 +1546,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
const char *Repl =
StringSwitch<const char*>(Op->getToken())
@@ -1542,7 +1565,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
bool WasOriginallyInvalidOperand = false;
- unsigned OrigErrorInfo;
MCInst Inst;
// First, try a direct match.
@@ -1557,13 +1579,15 @@ MatchAndEmitInstruction(SMLoc IDLoc,
;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
return false;
case Match_MissingFeature:
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled",
+ EmptyRanges, matchingInlineAsm);
return true;
case Match_ConversionFail:
- return Error(IDLoc, "unable to convert operands to instruction");
+ return Error(IDLoc, "unable to convert operands to instruction",
+ EmptyRanges, matchingInlineAsm);
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@@ -1615,7 +1639,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
(Match3 == Match_Success) + (Match4 == Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ MCInsts.push_back(Inst);
return false;
}
@@ -1642,7 +1666,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
OS << "'" << Base << MatchChars[i] << "'";
}
OS << ")";
- Error(IDLoc, OS.str());
+ Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm);
return true;
}
@@ -1654,30 +1678,33 @@ MatchAndEmitInstruction(SMLoc IDLoc,
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
- Op->getLocRange());
+ Op->getLocRange(), matchingInlineAsm);
}
// Recover location info for the operand if we know which was the problem.
if (OrigErrorInfo != ~0U) {
if (OrigErrorInfo >= Operands.size())
- return Error(IDLoc, "too few operands for instruction");
+ return Error(IDLoc, "too few operands for instruction",
+ EmptyRanges, matchingInlineAsm);
X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
if (Operand->getStartLoc().isValid()) {
SMRange OperandRange = Operand->getLocRange();
return Error(Operand->getStartLoc(), "invalid operand for instruction",
- OperandRange);
+ OperandRange, matchingInlineAsm);
}
}
- return Error(IDLoc, "invalid operand for instruction");
+ return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ matchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
(Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled",
+ EmptyRanges, matchingInlineAsm);
return true;
}
@@ -1685,12 +1712,14 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// operand failure.
if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
(Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
- Error(IDLoc, "invalid operand for instruction");
+ Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ matchingInlineAsm);
return true;
}
// If all of these were an outright failure, report it in a useless way.
- Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
+ EmptyRanges, matchingInlineAsm);
return true;
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 4bbfe95..5039887 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -327,7 +327,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
if (type == TYPE_RELv) {
isBranch = true;
pcrel = insn.startLocation +
- insn.displacementOffset + insn.displacementSize;
+ insn.immediateOffset + insn.immediateSize;
switch (insn.displacementSize) {
default:
break;
@@ -762,8 +762,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
translateRegister(mcInst, insn.vvvv);
return false;
case ENCODING_DUP:
- return translateOperand(mcInst,
- insn.spec->operands[operand.type - TYPE_DUP0],
+ return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
insn, Dis);
}
}
@@ -789,8 +788,8 @@ static bool translateInstruction(MCInst &mcInst,
insn.numImmediatesTranslated = 0;
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- if (insn.spec->operands[index].encoding != ENCODING_NONE) {
- if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) {
+ if (insn.operands[index].encoding != ENCODING_NONE) {
+ if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
return true;
}
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index c11f51c..0dbfa26 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -20,7 +20,7 @@
// 2. Read the opcode, and determine what kind of opcode it is. The
// disassembler distinguishes four kinds of opcodes, which are enumerated in
// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
-// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
+// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
//
// 3. Depending on the opcode type, look in one of four ClassDecision structures
@@ -74,8 +74,8 @@
#ifndef X86DISASSEMBLER_H
#define X86DISASSEMBLER_H
-#define INSTRUCTION_SPECIFIER_FIELDS \
- const char* name;
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ uint16_t operands;
#define INSTRUCTION_IDS \
unsigned instructionIDs;
@@ -88,7 +88,7 @@
#include "llvm/MC/MCDisassembler.h"
namespace llvm {
-
+
class MCInst;
class MCInstrInfo;
class MCSubtargetInfo;
@@ -96,7 +96,7 @@ class MemoryObject;
class raw_ostream;
struct EDInstInfo;
-
+
namespace X86Disassembler {
/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 6020877..0c92912 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1495,14 +1495,14 @@ static int readOperands(struct InternalInstruction* insn) {
needVVVV = hasVVVV && (insn->vvvv != 0);
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- switch (insn->spec->operands[index].encoding) {
+ switch (x86OperandSets[insn->spec->operands][index].encoding) {
case ENCODING_NONE:
break;
case ENCODING_REG:
case ENCODING_RM:
if (readModRM(insn))
return -1;
- if (fixupReg(insn, &insn->spec->operands[index]))
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_CB:
@@ -1524,14 +1524,14 @@ static int readOperands(struct InternalInstruction* insn) {
}
if (readImmediate(insn, 1))
return -1;
- if (insn->spec->operands[index].type == TYPE_IMM3 &&
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 7)
return -1;
- if (insn->spec->operands[index].type == TYPE_IMM5 &&
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 31)
return -1;
- if (insn->spec->operands[index].type == TYPE_XMM128 ||
- insn->spec->operands[index].type == TYPE_XMM256)
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
+ x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
sawRegImm = 1;
break;
case ENCODING_IW:
@@ -1582,7 +1582,7 @@ static int readOperands(struct InternalInstruction* insn) {
needVVVV = 0; /* Mark that we have found a VVVV operand. */
if (!hasVVVV)
return -1;
- if (fixupReg(insn, &insn->spec->operands[index]))
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_DUP:
@@ -1644,6 +1644,8 @@ int decodeInstruction(struct InternalInstruction* insn,
insn->instructionID == 0 ||
readOperands(insn))
return -1;
+
+ insn->operands = &x86OperandSets[insn->spec->operands][0];
insn->length = insn->readerCursor - insn->startLocation;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index e2caf6a..797703f 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -19,17 +19,18 @@
#ifdef __cplusplus
extern "C" {
#endif
-
-#define INSTRUCTION_SPECIFIER_FIELDS
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ uint16_t operands;
#define INSTRUCTION_IDS \
unsigned instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
-
+
#undef INSTRUCTION_SPECIFIER_FIELDS
#undef INSTRUCTION_IDS
-
+
/*
* Accessor functions for various fields of an Intel instruction
*/
@@ -43,7 +44,7 @@ extern "C" {
#define rFromREX(rex) (((rex) & 0x4) >> 2)
#define xFromREX(rex) (((rex) & 0x2) >> 1)
#define bFromREX(rex) ((rex) & 0x1)
-
+
#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
@@ -237,7 +238,7 @@ extern "C" {
ENTRY(YMM13) \
ENTRY(YMM14) \
ENTRY(YMM15)
-
+
#define REGS_SEGMENT \
ENTRY(ES) \
ENTRY(CS) \
@@ -245,7 +246,7 @@ extern "C" {
ENTRY(DS) \
ENTRY(FS) \
ENTRY(GS)
-
+
#define REGS_DEBUG \
ENTRY(DR0) \
ENTRY(DR1) \
@@ -266,12 +267,12 @@ extern "C" {
ENTRY(CR6) \
ENTRY(CR7) \
ENTRY(CR8)
-
+
#define ALL_EA_BASES \
EA_BASES_16BIT \
EA_BASES_32BIT \
EA_BASES_64BIT
-
+
#define ALL_SIB_BASES \
REGS_32BIT \
REGS_64BIT
@@ -290,7 +291,7 @@ extern "C" {
ENTRY(RIP)
/*
- * EABase - All possible values of the base field for effective-address
+ * EABase - All possible values of the base field for effective-address
* computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
* distinguish between bases (EA_BASE_*) and registers that just happen to be
* referred to when Mod == 0b11 (EA_REG_*).
@@ -305,8 +306,8 @@ typedef enum {
#undef ENTRY
EA_max
} EABase;
-
-/*
+
+/*
* SIBIndex - All possible values of the SIB index field.
* Borrows entries from ALL_EA_BASES with the special case that
* sib is synonymous with NONE.
@@ -321,7 +322,7 @@ typedef enum {
#undef ENTRY
SIB_INDEX_max
} SIBIndex;
-
+
/*
* SIBBase - All possible values of the SIB base field.
*/
@@ -353,7 +354,7 @@ typedef enum {
#undef ENTRY
MODRM_REG_max
} Reg;
-
+
/*
* SegmentOverride - All possible segment overrides.
*/
@@ -367,7 +368,7 @@ typedef enum {
SEG_OVERRIDE_GS,
SEG_OVERRIDE_max
} SegmentOverride;
-
+
/*
* VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
*/
@@ -431,16 +432,16 @@ struct InternalInstruction {
void* dlogArg;
/* General instruction information */
-
+
/* The mode to disassemble for (64-bit, protected, real) */
DisassemblerMode mode;
/* The start of the instruction, usable with the reader */
uint64_t startLocation;
/* The length of the instruction, in bytes */
size_t length;
-
+
/* Prefix state */
-
+
/* 1 if the prefix byte corresponding to the entry is present; 0 if not */
uint8_t prefixPresent[0x100];
/* contains the location (for use with the reader) of the prefix byte */
@@ -456,7 +457,7 @@ struct InternalInstruction {
uint64_t necessaryPrefixLocation;
/* The segment override type */
SegmentOverride segmentOverride;
-
+
/* Sizes of various critical pieces of data, in bytes */
uint8_t registerSize;
uint8_t addressSize;
@@ -467,9 +468,9 @@ struct InternalInstruction {
needed to find relocation entries for adding symbolic operands */
uint8_t displacementOffset;
uint8_t immediateOffset;
-
+
/* opcode state */
-
+
/* The value of the two-byte escape prefix (usually 0x0f) */
uint8_t twoByteEscape;
/* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
@@ -478,16 +479,16 @@ struct InternalInstruction {
uint8_t opcode;
/* The ModR/M byte of the instruction, if it is an opcode extension */
uint8_t modRMExtension;
-
+
/* decode state */
-
+
/* The type of opcode, used for indexing into the array of decode tables */
OpcodeType opcodeType;
/* The instruction ID, extracted from the decode table */
uint16_t instructionID;
/* The specifier for the instruction, from the instruction info table */
const struct InstructionSpecifier *spec;
-
+
/* state for additional bytes, consumed during operand decode. Pattern:
consumed___ indicates that the byte was already consumed and does not
need to be consumed again */
@@ -495,12 +496,12 @@ struct InternalInstruction {
/* The VEX.vvvv field, which contains a third register operand for some AVX
instructions */
Reg vvvv;
-
+
/* The ModR/M byte, which contains most register operands and some portion of
all memory operands */
BOOL consumedModRM;
uint8_t modRM;
-
+
/* The SIB byte, used for more complex 32- or 64-bit memory operands */
BOOL consumedSIB;
uint8_t sib;
@@ -508,19 +509,19 @@ struct InternalInstruction {
/* The displacement, used for memory operands */
BOOL consumedDisplacement;
int32_t displacement;
-
+
/* Immediates. There can be two in some cases */
uint8_t numImmediatesConsumed;
uint8_t numImmediatesTranslated;
uint64_t immediates[2];
-
+
/* A register or immediate operand encoded into the opcode */
BOOL consumedOpcodeModifier;
uint8_t opcodeModifier;
Reg opcodeRegister;
-
+
/* Portions of the ModR/M byte */
-
+
/* These fields determine the allowable values for the ModR/M fields, which
depend on operand and address widths */
EABase eaBaseBase;
@@ -533,11 +534,13 @@ struct InternalInstruction {
EADisplacement eaDisplacement;
/* The reg field always encodes a register */
Reg reg;
-
+
/* SIB state */
SIBIndex sibIndex;
uint8_t sibScale;
SIBBase sibBase;
+
+ const struct OperandSpecifier *operands;
};
/* decodeInstruction - Decode one instruction and store the decoding results in
@@ -571,15 +574,15 @@ int decodeInstruction(struct InternalInstruction* insn,
* @param line - The line number that printed the debug message.
* @param s - The message to print.
*/
-
+
void x86DisassemblerDebug(const char *file,
unsigned line,
const char *s);
const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii);
-#ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
-
+
#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 13e1136..b0a0e1e 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -119,7 +119,7 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize")
-#define ENUM_ENTRY(n, r, d) n,
+#define ENUM_ENTRY(n, r, d) n,
typedef enum {
INSTRUCTION_CONTEXTS
IC_max
@@ -148,11 +148,11 @@ typedef enum {
* If a ModR/M byte is not required, "required" is left unset, and the values
* for each instructionID are identical.
*/
-
+
typedef uint16_t InstrUID;
/*
- * ModRMDecisionType - describes the type of ModR/M decision, allowing the
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the
* consumer to determine the number of entries in it.
*
* MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
@@ -172,7 +172,7 @@ typedef uint16_t InstrUID;
ENUM_ENTRY(MODRM_SPLITREG) \
ENUM_ENTRY(MODRM_FULL)
-#define ENUM_ENTRY(n) n,
+#define ENUM_ENTRY(n) n,
typedef enum {
MODRMTYPES
MODRM_max
@@ -180,13 +180,13 @@ typedef enum {
#undef ENUM_ENTRY
/*
- * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
* instruction each possible value of the ModR/M byte corresponds to. Once
* this information is known, we have narrowed down to a single instruction.
*/
struct ModRMDecision {
uint8_t modrm_type;
-
+
/* The macro below must be defined wherever this file is included. */
INSTRUCTION_IDS
};
@@ -210,7 +210,7 @@ struct ContextDecision {
struct OpcodeDecision opcodeDecisions[IC_max];
};
-/*
+/*
* Physical encodings of instruction operands.
*/
@@ -244,14 +244,14 @@ struct ContextDecision {
ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
"in type")
-#define ENUM_ENTRY(n, d) n,
+#define ENUM_ENTRY(n, d) n,
typedef enum {
ENCODINGS
ENCODING_max
} OperandEncoding;
#undef ENUM_ENTRY
-/*
+/*
* Semantic interpretations of instruction operands.
*/
@@ -332,14 +332,14 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_DUP4, "operand 4") \
ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
-#define ENUM_ENTRY(n, d) n,
+#define ENUM_ENTRY(n, d) n,
typedef enum {
TYPES
TYPE_max
} OperandType;
#undef ENUM_ENTRY
-/*
+/*
* OperandSpecifier - The specification for how to extract and interpret one
* operand.
*/
@@ -374,8 +374,7 @@ typedef enum {
struct InstructionSpecifier {
uint8_t modifierType;
uint8_t modifierBase;
- struct OperandSpecifier operands[X86_MAX_OPERANDS];
-
+
/* The macro below must be defined wherever this file is included. */
INSTRUCTION_SPECIFIER_FIELDS
};
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 49c07f3..b0acd7d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -91,9 +91,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
- // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
- // .words.
- if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+ // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split
+ // into two .words.
+ if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) &&
+ T.getArch() == Triple::x86)
Data64bitsDirective = 0;
}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index bf05ccf..dce5b4d 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -26,7 +26,7 @@ class FunctionPass;
class JITCodeEmitter;
class X86TargetMachine;
-/// createX86ISelDag - This pass converts a legalized DAG into a
+/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6c1a816..18e6b7c 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -17,14 +17,14 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
-// X86 Subtarget state.
+// X86 Subtarget state
//
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
"64-bit mode (x86_64)">;
//===----------------------------------------------------------------------===//
-// X86 Subtarget features.
+// X86 Subtarget features
//===----------------------------------------------------------------------===//
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
@@ -97,7 +97,7 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
[FeatureAVX, FeatureSSE4A]>;
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
"Enable XOP instructions",
- [FeatureAVX, FeatureSSE4A]>;
+ [FeatureFMA4]>;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
@@ -226,7 +226,7 @@ def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePCLMUL,
FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI]>;
+ FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [Feature3DNow]>;
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index a6ed9ba..35386cd 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -37,15 +37,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
virtual const char *getPassName() const {
return "X86 AT&T-Style Assembly Printer";
}
-
+
const X86Subtarget &getSubtarget() const { return *Subtarget; }
virtual void EmitStartOfAsmFile(Module &M);
virtual void EmitEndOfAsmFile(Module &M);
-
+
virtual void EmitInstruction(const MachineInstr *MI);
-
+
void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
// These methods are used by the tablegen'erated instruction printer.
@@ -71,7 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
bool runOnMachineFunction(MachineFunction &F);
-
+
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index e01ff41..6a6125b 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -17,4 +17,3 @@ using namespace llvm;
X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
}
-
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 0cec95a..471eb31 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -1,4 +1,4 @@
-//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
+//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -33,7 +33,7 @@ public:
void addExternalFunction(MCSymbol* Symbol) {
Externals.insert(Symbol);
}
-
+
typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
externals_iterator externals_begin() const { return Externals.begin(); }
externals_iterator externals_end() const { return Externals.end(); }
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 585b7a5..e5952aa 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -57,7 +57,9 @@ class X86FastISel : public FastISel {
bool X86ScalarSSEf32;
public:
- explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
X86ScalarSSEf64 = Subtarget->hasSSE2();
@@ -155,9 +157,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
if (VT == MVT::f64 && !X86ScalarSSEf64)
- return false;
+ return false;
if (VT == MVT::f32 && !X86ScalarSSEf32)
- return false;
+ return false;
// Similarly, no f80 support yet.
if (VT == MVT::f80)
return false;
@@ -1516,6 +1518,22 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
return DoSelectCall(I, 0);
}
+static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
+ const ImmutableCallSite &CS) {
+ if (Subtarget.is64Bit())
+ return 0;
+ if (Subtarget.isTargetWindows())
+ return 0;
+ CallingConv::ID CC = CS.getCallingConv();
+ if (CC == CallingConv::Fast || CC == CallingConv::GHC)
+ return 0;
+ if (!CS.paramHasAttr(1, Attribute::StructRet))
+ return 0;
+ if (CS.paramHasAttr(1, Attribute::InReg))
+ return 0;
+ return 4;
+}
+
// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
const CallInst *CI = cast<CallInst>(I);
@@ -1862,12 +1880,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- unsigned NumBytesCallee = 0;
- if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() &&
- !(CS.getCallingConv() == CallingConv::Fast ||
- CS.getCallingConv() == CallingConv::GHC) &&
- CS.paramHasAttr(1, Attribute::StructRet))
- NumBytesCallee = 4;
+ const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(NumBytesCallee);
@@ -2129,28 +2142,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
- case MVT::f32:
- if (X86ScalarSSEf32) {
- Opc = X86::FsFLD0SS;
- RC = &X86::FR32RegClass;
- } else {
- Opc = X86::LD_Fp032;
- RC = &X86::RFP32RegClass;
- }
- break;
- case MVT::f64:
- if (X86ScalarSSEf64) {
- Opc = X86::FsFLD0SD;
- RC = &X86::FR64RegClass;
- } else {
- Opc = X86::LD_Fp064;
- RC = &X86::RFP64RegClass;
- }
- break;
- case MVT::f80:
- // No f80 support yet.
- return false;
+ default: return false;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = X86::FsFLD0SS;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = X86::FsFLD0SD;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
}
unsigned ResultReg = createResultReg(RC);
@@ -2169,7 +2182,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
if (!X86SelectAddress(LI->getOperand(0), AM))
return false;
- X86InstrInfo &XII = (X86InstrInfo&)TII;
+ const X86InstrInfo &XII = (const X86InstrInfo&)TII;
unsigned Size = TD.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
@@ -2188,7 +2201,8 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
namespace llvm {
- FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
- return new X86FastISel(funcInfo);
+ FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ return new X86FastISel(funcInfo, libInfo);
}
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 711ee41..955c75a 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -971,7 +971,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
// Change from the pseudo instruction to the concrete instruction.
MI->RemoveOperand(0); // Remove the explicit ST(0) operand
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-
+
// Result gets pushed on the stack.
pushReg(DestReg);
}
@@ -1015,7 +1015,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
-
+
// Convert from the pseudo instruction to the concrete instruction.
MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
@@ -1297,7 +1297,7 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
MI->RemoveOperand(1);
MI->getOperand(0).setReg(getSTReg(Op1));
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-
+
// If we kill the second operand, make sure to pop it from the stack.
if (Op0 != Op1 && KillsOp1) {
// Get this value off of the register stack.
@@ -1714,38 +1714,38 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// Assert that the top of stack contains the right FP register.
assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
"Top of stack not the right register for RET!");
-
+
// Ok, everything is good, mark the value as not being on the stack
// anymore so that our assertion about the stack being empty at end of
// block doesn't fire.
StackTop = 0;
return;
}
-
+
// Otherwise, we are returning two values:
// 2) If returning the same value for both, we only have one thing in the FP
// stack. Consider: RET FP1, FP1
if (StackTop == 1) {
assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
"Stack misconfiguration for RET!");
-
+
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
unsigned NewReg = getScratchReg();
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
-
+
/// Okay we know we have two different FPx operands now:
assert(StackTop == 2 && "Must have two values live!");
-
+
/// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
/// in ST(1). In this case, emit an fxch.
if (getStackEntry(0) == SecondFPRegOp) {
assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
moveToTop(FirstFPRegOp, MI);
}
-
+
/// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
/// ST(1). Just remove both from our understanding of the stack and return.
assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5186482..27195b4 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -60,7 +60,7 @@ namespace {
int Base_FrameIndex;
unsigned Scale;
- SDValue IndexReg;
+ SDValue IndexReg;
int32_t Disp;
SDValue Segment;
const GlobalValue *GV;
@@ -80,11 +80,11 @@ namespace {
bool hasSymbolicDisplacement() const {
return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
}
-
+
bool hasBaseOrIndexReg() const {
return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
}
-
+
/// isRIPRelative - Return true if this addressing mode is already RIP
/// relative.
bool isRIPRelative() const {
@@ -94,7 +94,7 @@ namespace {
return RegNode->getReg() == X86::RIP;
return false;
}
-
+
void setBaseReg(SDValue Reg) {
BaseType = RegBase;
Base_Reg = Reg;
@@ -104,7 +104,7 @@ namespace {
dbgs() << "X86ISelAddressMode " << this << '\n';
dbgs() << "Base_Reg ";
if (Base_Reg.getNode() != 0)
- Base_Reg.getNode()->dump();
+ Base_Reg.getNode()->dump();
else
dbgs() << "nul";
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
@@ -113,7 +113,7 @@ namespace {
if (IndexReg.getNode() != 0)
IndexReg.getNode()->dump();
else
- dbgs() << "nul";
+ dbgs() << "nul";
dbgs() << " Disp " << Disp << '\n'
<< "GV ";
if (GV)
@@ -213,21 +213,21 @@ namespace {
SDValue &Index, SDValue &Disp,
SDValue &Segment,
SDValue &NodeWithChain);
-
+
bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
-
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
-
+
void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
- inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
@@ -426,7 +426,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
-
+
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
@@ -462,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
++NumLoadMoved;
continue;
}
-
+
// Lower fpround and fpextend nodes that target the FP stack to be store and
// load to the stack. This is a gross hack. We would like to simply mark
// these as being illegal, but when we do that, legalize produces these when
@@ -473,7 +473,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// FIXME: This should only happen when not compiled with -O0.
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
-
+
EVT SrcVT = N->getOperand(0).getValueType();
EVT DstVT = N->getValueType(0);
@@ -496,7 +496,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (N->getConstantOperandVal(1))
continue;
}
-
+
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
@@ -505,10 +505,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
else
MemVT = SrcIsSSE ? SrcVT : DstVT;
-
+
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
DebugLoc dl = N->getDebugLoc();
-
+
// FIXME: optimize the case where the src/dest is a load or store?
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
@@ -524,12 +524,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// To avoid invalidating 'I', back it up to the convert node.
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
-
+
// Now that we did that, the node is dead. Increment the iterator to the
// next node to process, then delete N.
++I;
CurDAG->DeleteNode(N);
- }
+ }
}
@@ -584,7 +584,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
SDValue Address = N->getOperand(1);
-
+
// load gs:0 -> GS segment register.
// load fs:0 -> FS segment register.
//
@@ -593,7 +593,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
- Subtarget->isTargetELF())
+ Subtarget->isTargetLinux())
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -602,7 +602,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
return false;
}
-
+
return true;
}
@@ -992,7 +992,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
case ISD::SHL:
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
break;
-
+
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
unsigned Val = CN->getZExtValue();
@@ -1167,7 +1167,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
return false;
AM = Backup;
-
+
// Try again after commuting the operands.
if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
!MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
@@ -1203,7 +1203,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM = Backup;
}
break;
-
+
case ISD::AND: {
// Perform some heroic transforms on an and of a constant-count shift
// with a constant to enable use of the scaled offset field.
@@ -1275,7 +1275,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
-
+
if (Parent &&
// This list of opcodes are all the nodes that have an "addr:$ptr" operand
// that are not a MemSDNode, and thus don't have proper addrspace info.
@@ -1290,7 +1290,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
if (AddrSpace == 257)
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
}
-
+
if (MatchAddress(N, AM))
return false;
@@ -1336,7 +1336,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
// elements. This is a vector shuffle from the zero vector.
if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
// Check to see if the top elements are all zeros (or bitcast of zeros).
- N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(0).getNode()->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
N.getOperand(0).getOperand(0).hasOneUse() &&
@@ -1411,7 +1411,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
// If it isn't worth using an LEA, reject it.
if (Complexity <= 2)
return false;
-
+
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1422,7 +1422,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-
+
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
@@ -1435,7 +1435,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
} else {
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
-
+
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1449,7 +1449,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
!IsProfitableToFold(N, P, P) ||
!IsLegalToFold(N, P, P, OptLevel))
return false;
-
+
return SelectAddr(N.getNode(),
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
@@ -1700,7 +1700,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
-
+
// Optimize common patterns for __sync_or_and_fetch and similar arith
// operations where the result is not used. This allows us to use the "lock"
// version of the arithmetic instruction.
@@ -1727,14 +1727,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
default:
return 0;
}
-
+
bool isCN = false;
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
isCN = true;
Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
}
-
+
unsigned Opc = 0;
switch (NVT.getSimpleVT().SimpleTy) {
default: return 0;
@@ -1772,7 +1772,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
}
break;
}
-
+
assert(Opc != 0 && "Invalid arith lock transform!");
DebugLoc dl = Node->getDebugLoc();
@@ -1852,7 +1852,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
/// is suitable for doing the {load; increment or decrement; store} to modify
/// transformation.
-static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
SDValue StoredVal, SelectionDAG *CurDAG,
LoadSDNode* &LoadNode, SDValue &InputChain) {
@@ -1876,15 +1876,15 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
// Return LoadNode by reference.
LoadNode = cast<LoadSDNode>(Load);
// is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
- EVT LdVT = LoadNode->getMemoryVT();
- if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
+ EVT LdVT = LoadNode->getMemoryVT();
+ if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
LdVT != MVT::i8)
return false;
// Is store the only read of the loaded value?
if (!Load.hasOneUse())
return false;
-
+
// Is the address of the store the same as the load?
if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
LoadNode->getOffset() != StoreNode->getOffset())
@@ -1990,7 +1990,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
-
+
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
if (Node->isMachineOpcode()) {
@@ -2062,7 +2062,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::ATOMSWAP64_DAG: {
unsigned Opc;
switch (Opcode) {
- default: llvm_unreachable("Impossible intrinsic");
+ default: llvm_unreachable("Impossible opcode");
case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
@@ -2119,7 +2119,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
break;
- unsigned ShlOp, Op = 0;
+ unsigned ShlOp, Op;
EVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
@@ -2142,6 +2142,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ShlOp = X86::SHL32ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = X86::AND32ri8; break;
case ISD::OR: Op = X86::OR32ri8; break;
case ISD::XOR: Op = X86::XOR32ri8; break;
@@ -2152,6 +2153,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ShlOp = X86::SHL64ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
@@ -2168,7 +2170,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
-
+
unsigned LoReg;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
@@ -2177,20 +2179,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
}
-
+
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
-
+
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
SDValue Ops[] = {N1, InFlag};
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
-
+
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
return NULL;
}
-
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
@@ -2287,7 +2289,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
+
return NULL;
}
@@ -2438,7 +2440,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- case X86ISD::CMP: {
+ case X86ISD::CMP:
+ case X86ISD::SUB: {
+ // Sometimes a SUB is used to perform comparison.
+ if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
+ // This node is not a CMP.
+ break;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
@@ -2555,7 +2562,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// a simple increment or decrement through memory of that value, if the
// uses of the modified value and its address are suitable.
// The DEC64m tablegen pattern is currently not able to match the case where
- // the EFLAGS on the original DEC are used. (This also applies to
+ // the EFLAGS on the original DEC are used. (This also applies to
// {INC,DEC}X{64,32,16,8}.)
// We'll need to improve tablegen to allow flags to be transferred from a
// node in the pattern to the result node. probably with a new keyword
@@ -2587,7 +2594,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
- EVT LdVT = LoadNode->getMemoryVT();
+ EVT LdVT = LoadNode->getMemoryVT();
unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
Node->getDebugLoc(),
@@ -2600,6 +2607,85 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return Result;
}
+
+ // FIXME: Custom handling because TableGen doesn't support multiple implicit
+ // defs in an instruction pattern
+ case X86ISD::PCMPESTRI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ SDValue N3 = Node->getOperand(3);
+ SDValue N4 = Node->getOperand(4);
+
+ // Make sure last argument is a constant
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4);
+ if (!Cst)
+ break;
+
+ uint64_t Imm = Cst->getZExtValue();
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ X86::EAX, N1, SDValue()).getValue(1);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
+ N3, InFlag).getValue(1);
+
+ SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
+ unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
+ X86::PCMPESTRIrr;
+ InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+ array_lengthof(Ops)), 0);
+
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::ECX, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::EFLAGS, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ }
+
+ return NULL;
+ }
+
+ // FIXME: Custom handling because TableGen doesn't support multiple implicit
+ // defs in an instruction pattern
+ case X86ISD::PCMPISTRI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+
+ // Make sure last argument is a constant
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2);
+ if (!Cst)
+ break;
+
+ uint64_t Imm = Cst->getZExtValue();
+
+ SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
+ unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
+ X86::PCMPISTRIrr;
+ SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+ array_lengthof(Ops)), 0);
+
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::ECX, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::EFLAGS, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ }
+
+ return NULL;
+ }
}
SDNode *ResNode = SelectCode(Node);
@@ -2627,7 +2713,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
return true;
break;
}
-
+
OutOps.push_back(Op0);
OutOps.push_back(Op1);
OutOps.push_back(Op2);
@@ -2636,7 +2722,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
return false;
}
-/// createX86ISelDag - This pass converts a legalized DAG into a
+/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b88f2fa..7954170 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -66,7 +66,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, DebugLoc dl) {
EVT VT = Vec.getValueType();
- assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+ assert(VT.is256BitVector() && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
unsigned Factor = VT.getSizeInBits()/128;
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
@@ -105,7 +105,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
return Result;
EVT VT = Vec.getValueType();
- assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+ assert(VT.is128BitVector() && "Unexpected vector size!");
EVT ElVT = VT.getVectorElementType();
EVT ResultVT = Result.getValueType();
@@ -174,7 +174,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
- if (Subtarget->isAtom())
+ if (Subtarget->isAtom())
setSchedulingPreference(Sched::ILP);
else if (Subtarget->is64Bit())
setSchedulingPreference(Sched::ILP);
@@ -731,6 +731,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMA, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
@@ -828,7 +829,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
@@ -869,27 +869,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
-
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- EVT VT = (MVT::SimpleValueType)i;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
// Do not attempt to custom lower non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR,
- VT.getSimpleVT().SimpleTy, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE,
- VT.getSimpleVT().SimpleTy, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT,
- VT.getSimpleVT().SimpleTy, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
@@ -906,23 +897,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
- setOperationAction(ISD::AND, SVT, Promote);
- AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
- setOperationAction(ISD::OR, SVT, Promote);
- AddPromotedToType (ISD::OR, SVT, MVT::v2i64);
- setOperationAction(ISD::XOR, SVT, Promote);
- AddPromotedToType (ISD::XOR, SVT, MVT::v2i64);
- setOperationAction(ISD::LOAD, SVT, Promote);
- AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64);
- setOperationAction(ISD::SELECT, SVT, Promote);
- AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v2i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -1009,9 +999,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE42())
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
-
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
@@ -1042,13 +1029,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom);
-
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
@@ -1072,6 +1052,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+ if (Subtarget->hasFMA()) {
+ setOperationAction(ISD::FMA, MVT::v8f32, Custom);
+ setOperationAction(ISD::FMA, MVT::v4f64, Custom);
+ setOperationAction(ISD::FMA, MVT::v4f32, Custom);
+ setOperationAction(ISD::FMA, MVT::v2f64, Custom);
+ setOperationAction(ISD::FMA, MVT::f32, Custom);
+ setOperationAction(ISD::FMA, MVT::f64, Custom);
+ }
+
if (Subtarget->hasAVX2()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
@@ -1125,45 +1114,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Custom lower several nodes for 256-bit types.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
if (VT.is128BitVector())
- setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Do not attempt to custom lower other non-256-bit vectors
if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
- MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
- EVT VT = SVT;
+ MVT VT = (MVT::SimpleValueType)i;
// Do not attempt to promote non-256-bit vectors
if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::AND, SVT, Promote);
- AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
- setOperationAction(ISD::OR, SVT, Promote);
- AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
- setOperationAction(ISD::XOR, SVT, Promote);
- AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
- setOperationAction(ISD::LOAD, SVT, Promote);
- AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
- setOperationAction(ISD::SELECT, SVT, Promote);
- AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v4i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
}
}
@@ -1221,6 +1209,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
@@ -1718,21 +1707,37 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
/// CallIsStructReturn - Determines whether a call uses struct return
/// semantics.
-static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+enum StructReturnType {
+ NotStructReturn,
+ RegStructReturn,
+ StackStructReturn
+};
+static StructReturnType
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
if (Outs.empty())
- return false;
+ return NotStructReturn;
- return Outs[0].Flags.isSRet();
+ const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
+ if (!Flags.isSRet())
+ return NotStructReturn;
+ if (Flags.isInReg())
+ return RegStructReturn;
+ return StackStructReturn;
}
/// ArgsAreStructReturn - Determines whether a function uses struct
/// return semantics.
-static bool
-ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+static StructReturnType
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
- return false;
+ return NotStructReturn;
- return Ins[0].Flags.isSRet();
+ const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
+ if (!Flags.isSRet())
+ return NotStructReturn;
+ if (Flags.isInReg())
+ return RegStructReturn;
+ return StackStructReturn;
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
@@ -1876,9 +1881,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+ else if (RegVT.is256BitVector())
RC = &X86::VR256RegClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+ else if (RegVT.is128BitVector())
RC = &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
RC = &X86::VR64RegClass;
@@ -2073,7 +2078,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
- ArgsAreStructReturn(Ins))
+ argsAreStructReturn(Ins) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -2163,7 +2168,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
bool IsWindows = Subtarget->isTargetWindows();
- bool IsStructRet = CallIsStructReturn(Outs);
+ StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
if (MF.getTarget().Options.DisableTailCalls)
@@ -2172,8 +2177,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
- Outs, OutVals, Ins, DAG);
+ isVarArg, SR != NotStructReturn,
+ MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
@@ -2255,7 +2261,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::AExt:
- if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+ if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
@@ -2549,7 +2555,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
getTargetMachine().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
- IsStructRet)
+ SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -2870,8 +2876,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
FastISel *
-X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
- return X86::createFastISel(funcInfo);
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return X86::createFastISel(funcInfo, libInfo);
}
@@ -3397,11 +3404,11 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
+ unsigned NumElems = VT.getVectorNumElements();
+
if (NumElems != 4)
return false;
@@ -3416,11 +3423,11 @@ static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
-
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
+ unsigned NumElems = VT.getVectorNumElements();
+
if (NumElems != 4)
return false;
@@ -3433,7 +3440,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
@@ -3455,10 +3462,12 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+ if (!VT.is128BitVector())
+ return false;
+
unsigned NumElems = VT.getVectorNumElements();
- if ((NumElems != 2 && NumElems != 4)
- || VT.getSizeInBits() > 128)
+ if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
@@ -3675,7 +3684,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
@@ -3697,7 +3706,7 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX || VT.getSizeInBits() != 256)
+ if (!HasAVX || !VT.is256BitVector())
return false;
// The shuffle result is divided into half A and half B. In total the two
@@ -3789,9 +3798,10 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
/// element of vector 2 and the other elements to come from vector 1 in order.
static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
- unsigned NumOps = VT.getVectorNumElements();
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return false;
+
+ unsigned NumOps = VT.getVectorNumElements();
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
@@ -3857,9 +3867,11 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- unsigned NumElts = VT.getVectorNumElements();
+ if (!HasAVX || !VT.is256BitVector())
+ return false;
- if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts != 4)
return false;
for (unsigned i = 0; i != NumElts/2; ++i)
@@ -3875,7 +3887,7 @@ static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// version of MOVDDUP.
static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
unsigned e = VT.getVectorNumElements() / 2;
@@ -4120,7 +4132,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
if (VT.getVectorNumElements() != 4)
return false;
@@ -4177,7 +4189,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) {
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ArrayRef<int> Mask, EVT VT) {
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return false;
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
@@ -4719,7 +4731,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
// Although the logic below support any bitwidth size, there are no
// shift instructions which handle more than 128-bit vectors.
- if (SVOp->getValueType(0).getSizeInBits() > 128)
+ if (!SVOp->getValueType(0).is128BitVector())
return false;
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
@@ -4814,7 +4826,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
- assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
+ assert(VT.is128BitVector() && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
@@ -5047,7 +5059,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
}
}
- bool Is256 = VT.getSizeInBits() == 256;
+ bool Is256 = VT.is256BitVector();
// Handle the broadcasting a single constant scalar from the constant pool
// into a vector. On Sandybridge it is still better to load a constant vector
@@ -5102,6 +5114,86 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
return SDValue();
}
+// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
+// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
+// constraint of matching input/output vector elements.
+SDValue
+X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDNode *N = Op.getNode();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = Op.getNumOperands();
+
+ // Check supported types and sub-targets.
+ //
+ // Only v2f32 -> v2f64 needs special handling.
+ if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
+ return SDValue();
+
+ SDValue VecIn;
+ EVT VecInVT;
+ SmallVector<int, 8> Mask;
+ EVT SrcVT = MVT::Other;
+
+ // Check the patterns could be translated into X86vfpext.
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opcode = In.getOpcode();
+
+ // Skip if the element is undefined.
+ if (Opcode == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Quit if one of the elements is not defined from 'fpext'.
+ if (Opcode != ISD::FP_EXTEND)
+ return SDValue();
+
+ // Check how the source of 'fpext' is defined.
+ SDValue L2In = In.getOperand(0);
+ EVT L2InVT = L2In.getValueType();
+
+ // Check the original type
+ if (SrcVT == MVT::Other)
+ SrcVT = L2InVT;
+ else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
+ return SDValue();
+
+ // Check whether the value being 'fpext'ed is extracted from the same
+ // source.
+ Opcode = L2In.getOpcode();
+
+ // Quit if it's not extracted with a constant index.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(L2In.getOperand(1)))
+ return SDValue();
+
+ SDValue ExtractedFromVec = L2In.getOperand(0);
+
+ if (VecIn.getNode() == 0) {
+ VecIn = ExtractedFromVec;
+ VecInVT = ExtractedFromVec.getValueType();
+ } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
+ return SDValue();
+
+ Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
+ }
+
+ // Quit if all operands of BUILD_VECTOR are undefined.
+ if (!VecIn.getNode())
+ return SDValue();
+
+ // Fill the remaining mask as undef.
+ for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
+ Mask.push_back(-1);
+
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+ DAG.getVectorShuffle(VecInVT, DL,
+ VecIn, DAG.getUNDEF(VecInVT),
+ &Mask[0]));
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -5134,6 +5226,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (Broadcast.getNode())
return Broadcast;
+ SDValue FpExt = LowerVectorFpExtend(Op, DAG);
+ if (FpExt.getNode())
+ return FpExt;
+
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
@@ -5209,12 +5305,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -5223,11 +5319,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
} else {
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5287,7 +5383,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SmallVector<SDValue, 32> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
@@ -5368,7 +5464,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
- if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ if (Values.size() > 1 && VT.is128BitVector()) {
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
@@ -5429,39 +5525,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place
-// them in a MMX register. This is better than doing a stack convert.
-static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
- DebugLoc dl = Op.getDebugLoc();
- EVT ResVT = Op.getValueType();
-
- assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
- ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
- int Mask[2];
- SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
- SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
- InVec = Op.getOperand(1);
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- unsigned NumElts = ResVT.getVectorNumElements();
- VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
- InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
- } else {
- InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
- SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
- Mask[0] = 0; Mask[1] = 2;
- VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
- }
- return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
-}
-
// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT ResVT = Op.getValueType();
- assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide");
+ assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -5472,16 +5542,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDValue
X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
- EVT ResVT = Op.getValueType();
-
assert(Op.getNumOperands() == 2);
- assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) &&
- "Unsupported CONCAT_VECTORS for value type");
-
- // We support concatenate two MMX registers and place them in a MMX register.
- // This is better than doing a stack convert.
- if (ResVT.is128BitVector())
- return LowerMMXCONCAT_VECTORS(Op, DAG);
// 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.
@@ -6131,7 +6192,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
DebugLoc dl = SVOp->getDebugLoc();
EVT VT = SVOp->getValueType(0);
- assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
+ assert(VT.is128BitVector() && "Unsupported vector size");
std::pair<int, int> Locs[4];
int Mask1[] = { -1, -1, -1, -1 };
@@ -6759,7 +6820,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Handle all 128-bit wide vectors with 4 elements, and match them with
// several different shuffle types.
- if (NumElems == 4 && VT.getSizeInBits() == 128)
+ if (NumElems == 4 && VT.is128BitVector())
return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
// Handle general 256-bit shuffles
@@ -6775,7 +6836,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- if (Op.getOperand(0).getValueType().getSizeInBits() != 128)
+ if (!Op.getOperand(0).getValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
@@ -6845,7 +6906,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
- if (VecVT.getSizeInBits() == 256) {
+ if (VecVT.is256BitVector()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
unsigned NumElems = VecVT.getVectorNumElements();
SDValue Idx = Op.getOperand(1);
@@ -6860,7 +6921,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
DAG.getConstant(IdxVal, MVT::i32));
}
- assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+ assert(VecVT.is128BitVector() && "Unexpected vector length");
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
@@ -6936,7 +6997,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- if (VT.getSizeInBits() == 256)
+ if (!VT.is128BitVector())
return SDValue();
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
@@ -6992,7 +7053,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
// If this is a 256-bit vector result, first extract the 128-bit vector,
// insert the element into the extracted half and then place it back.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
@@ -7036,7 +7097,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
- if (OpVT.getSizeInBits() > 128) {
+ if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
EVT VT128 = EVT::getVectorVT(*Context,
OpVT.getVectorElementType(),
@@ -7053,7 +7114,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- assert(OpVT.getSizeInBits() == 128 && "Expected an SSE type!");
+ assert(OpVT.is128BitVector() && "Expected an SSE type!");
return DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
}
@@ -7068,8 +7129,8 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue Vec = Op.getNode()->getOperand(0);
SDValue Idx = Op.getNode()->getOperand(1);
- if (Op.getNode()->getValueType(0).getSizeInBits() == 128 &&
- Vec.getNode()->getValueType(0).getSizeInBits() == 256 &&
+ if (Op.getNode()->getValueType(0).is128BitVector() &&
+ Vec.getNode()->getValueType(0).is256BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Extract128BitVector(Vec, IdxVal, DAG, dl);
@@ -7089,8 +7150,8 @@ X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue SubVec = Op.getNode()->getOperand(1);
SDValue Idx = Op.getNode()->getOperand(2);
- if (Op.getNode()->getValueType(0).getSizeInBits() == 256 &&
- SubVec.getNode()->getValueType(0).getSizeInBits() == 128 &&
+ if (Op.getNode()->getValueType(0).is256BitVector() &&
+ SubVec.getNode()->getValueType(0).is128BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
@@ -7735,9 +7796,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
#ifdef __SSE3__
- haddpd %xmm0, %xmm0
+ haddpd %xmm0, %xmm0
#else
- pshufd $0x4e, %xmm0, %xmm1
+ pshufd $0x4e, %xmm0, %xmm1
addpd %xmm1, %xmm0
#endif
*/
@@ -8064,7 +8125,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
EltVT = VT.getVectorElementType();
Constant *C;
if (EltVT == MVT::f64) {
- C = ConstantVector::getSplat(2,
+ C = ConstantVector::getSplat(2,
ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
} else {
C = ConstantVector::getSplat(4,
@@ -8098,7 +8159,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
if (VT.isVector()) {
- MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
+ MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, XORVT,
DAG.getNode(ISD::BITCAST, dl, XORVT,
@@ -8226,7 +8287,33 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
unsigned Opcode = 0;
unsigned NumOperands = 0;
- switch (Op.getNode()->getOpcode()) {
+
+ // Truncate operations may prevent the merge of the SETCC instruction
+ // and the arithmetic intruction before it. Attempt to truncate the operands
+ // of the arithmetic instruction and use a reduced bit-width instruction.
+ bool NeedTruncation = false;
+ SDValue ArithOp = Op;
+ if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
+ SDValue Arith = Op->getOperand(0);
+ // Both the trunc and the arithmetic op need to have one user each.
+ if (Arith->hasOneUse())
+ switch (Arith.getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ NeedTruncation = true;
+ ArithOp = Arith;
+ }
+ }
+ }
+
+ // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
+ // which may be the result of a CAST. We use the variable 'Op', which is the
+ // non-casted variable when we check for possible users.
+ switch (ArithOp.getOpcode()) {
case ISD::ADD:
// Due to an isel shortcoming, be conservative if this add is likely to be
// selected as part of a load-modify-store instruction. When the root node
@@ -8246,7 +8333,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
goto default_case;
if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
if (C->getAPIntValue() == 1) {
Opcode = X86ISD::INC;
@@ -8282,7 +8369,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
if (User->getOpcode() != ISD::BRCOND &&
User->getOpcode() != ISD::SETCC &&
- (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
NonFlagUse = true;
break;
}
@@ -8303,15 +8390,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
goto default_case;
// Otherwise use a regular EFLAGS-setting instruction.
- switch (Op.getNode()->getOpcode()) {
+ switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
- case ISD::SUB:
- // If the only use of SUB is EFLAGS, use CMP instead.
- if (Op.hasOneUse())
- Opcode = X86ISD::CMP;
- else
- Opcode = X86ISD::SUB;
- break;
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::OR: Opcode = X86ISD::OR; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
@@ -8332,19 +8413,40 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
break;
}
+ // If we found that truncation is beneficial, perform the truncation and
+ // update 'Op'.
+ if (NeedTruncation) {
+ EVT VT = Op.getValueType();
+ SDValue WideVal = Op->getOperand(0);
+ EVT WideVT = WideVal.getValueType();
+ unsigned ConvertedOp = 0;
+ // Use a target machine opcode to prevent further DAGCombine
+ // optimizations that may separate the arithmetic operations
+ // from the setcc node.
+ switch (WideVal.getOpcode()) {
+ default: break;
+ case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
+ case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
+ case ISD::AND: ConvertedOp = X86ISD::AND; break;
+ case ISD::OR: ConvertedOp = X86ISD::OR; break;
+ case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
+ }
+
+ if (ConvertedOp) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
+ SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
+ SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
+ Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
+ }
+ }
+ }
+
if (Opcode == 0)
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
- if (Opcode == X86ISD::CMP) {
- SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0),
- Op.getOperand(1));
- // We can't replace usage of SUB with CMP.
- // The SUB node will be removed later because there is no use of it.
- return SDValue(New.getNode(), 0);
- }
-
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i != NumOperands; ++i)
@@ -8364,6 +8466,14 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
return EmitTest(Op0, X86CC, DAG);
DebugLoc dl = Op0.getDebugLoc();
+ if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
+ Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
+ // Use SUB instead of CMP to enable CSE between SUB and CMP.
+ SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
+ SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
+ Op0, Op1);
+ return SDValue(Sub.getNode(), 1);
+ }
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
@@ -8522,7 +8632,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC &&
+ assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
@@ -8559,10 +8669,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
- unsigned SSECC = 8;
+#ifndef NDEBUG
EVT EltVT = Op0.getValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+#endif
+ unsigned SSECC;
bool Swap = false;
// SSE Condition code mapping:
@@ -8575,7 +8687,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
@@ -8589,34 +8701,33 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
- case ISD::SETULE: Swap = true;
+ case ISD::SETULE: Swap = true; // Fallthrough
case ISD::SETUGE: SSECC = 5; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; // Fallthrough
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
+ case ISD::SETUEQ:
+ case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
+ unsigned CC0, CC1;
+ unsigned CombineOpc;
if (SetCCOpcode == ISD::SETUEQ) {
- SDValue UNORD, EQ;
- UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(3, MVT::i8));
- EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(0, MVT::i8));
- return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
- }
- if (SetCCOpcode == ISD::SETONE) {
- SDValue ORD, NEQ;
- ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(7, MVT::i8));
- NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(4, MVT::i8));
- return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
+ } else {
+ assert(SetCCOpcode == ISD::SETONE);
+ CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
- llvm_unreachable("Illegal FP comparison");
+
+ SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC0, MVT::i8));
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC1, MVT::i8));
+ return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
// Handle all other FP comparisons here.
return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
@@ -8624,17 +8735,17 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
}
// Break 256-bit integer vector compare into smaller ones.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasAVX2())
return Lower256IntVSETCC(Op, DAG);
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc = 0;
+ unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false;
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
@@ -8651,10 +8762,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
- if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
- return SDValue();
- if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
- return SDValue();
+ if (VT == MVT::v2i64) {
+ if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
+ return SDValue();
+ }
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
@@ -8714,6 +8827,16 @@ static bool isAllOnes(SDValue V) {
return C && C->isAllOnesValue();
}
+static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
+ if (V.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue VOp0 = V.getOperand(0);
+ unsigned InBits = VOp0.getValueSizeInBits();
+ unsigned Bits = V.getValueSizeInBits();
+ return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
+}
+
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
@@ -8728,46 +8851,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = NewCond;
}
- // Handle the following cases related to max and min:
- // (a > b) ? (a-b) : 0
- // (a >= b) ? (a-b) : 0
- // (b < a) ? (a-b) : 0
- // (b <= a) ? (a-b) : 0
- // Comparison is removed to use EFLAGS from SUB.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2))
- if (Cond.getOpcode() == X86ISD::SETCC &&
- Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
- (Op1.getOpcode() == ISD::SUB || Op1.getOpcode() == X86ISD::SUB) &&
- C->getAPIntValue() == 0) {
- SDValue Cmp = Cond.getOperand(1);
- unsigned CC = cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
- if ((DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(0)) &&
- DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(1)) &&
- (CC == X86::COND_G || CC == X86::COND_GE ||
- CC == X86::COND_A || CC == X86::COND_AE)) ||
- (DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(1)) &&
- DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(0)) &&
- (CC == X86::COND_L || CC == X86::COND_LE ||
- CC == X86::COND_B || CC == X86::COND_BE))) {
-
- if (Op1.getOpcode() == ISD::SUB) {
- SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i32);
- SDValue New = DAG.getNode(X86ISD::SUB, DL, VTs,
- Op1.getOperand(0), Op1.getOperand(1));
- DAG.ReplaceAllUsesWith(Op1, New);
- Op1 = New;
- }
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
- unsigned NewCC = (CC == X86::COND_G || CC == X86::COND_GE ||
- CC == X86::COND_L ||
- CC == X86::COND_LE) ? X86::COND_GE : X86::COND_AE;
- SDValue Ops[] = { Op2, Op1, DAG.getConstant(NewCC, MVT::i8),
- SDValue(Op1.getNode(), 1) };
- return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
- }
- }
-
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
// (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
@@ -8788,11 +8871,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x != 0), -1, 0) -> neg & sbb
// (select (x == 0), 0, -1) -> neg & sbb
if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
- if (YC->isNullValue() &&
+ if (YC->isNullValue() &&
(isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
- DAG.getConstant(0, CmpOp0.getValueType()),
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
+ DAG.getConstant(0, CmpOp0.getValueType()),
CmpOp0);
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8),
@@ -8883,9 +8966,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- // Look pass the truncate.
- if (Cond.getOpcode() == ISD::TRUNCATE)
- Cond = Cond.getOperand(0);
+ // Look pass the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
@@ -8908,7 +8991,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// a < b ? 0 : -1 -> RES = setcc_carry
// a >= b ? -1 : 0 -> RES = setcc_carry
// a >= b ? 0 : -1 -> RES = ~setcc_carry
- if (Cond.getOpcode() == X86ISD::CMP) {
+ if (Cond.getOpcode() == X86ISD::SUB) {
Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
@@ -9192,9 +9275,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- // Look pass the truncate.
- if (Cond.getOpcode() == ISD::TRUNCATE)
- Cond = Cond.getOperand(0);
+ // Look pass the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
@@ -9459,8 +9542,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue ShOps[4];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
// The return type has to be a 128-bit type with the same element
@@ -9503,8 +9585,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_sse2_ucomigt_sd:
case Intrinsic::x86_sse2_ucomige_sd:
case Intrinsic::x86_sse2_ucomineq_sd: {
- unsigned Opc = 0;
- ISD::CondCode CC = ISD::SETCC_INVALID;
+ unsigned Opc;
+ ISD::CondCode CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_comieq_ss:
@@ -9578,55 +9660,102 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+
// Arithmetic intrinsics.
case Intrinsic::x86_sse2_pmulu_dq:
case Intrinsic::x86_avx2_pmulu_dq:
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
+ // SSE3/AVX horizontal add/sub intrinsics
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
case Intrinsic::x86_avx_hadd_pd_256:
- return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse3_hsub_ps:
case Intrinsic::x86_sse3_hsub_pd:
case Intrinsic::x86_avx_hsub_ps_256:
case Intrinsic::x86_avx_hsub_pd_256:
- return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phadd_w_128:
case Intrinsic::x86_ssse3_phadd_d_128:
case Intrinsic::x86_avx2_phadd_w:
case Intrinsic::x86_avx2_phadd_d:
- return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phsub_w_128:
case Intrinsic::x86_ssse3_phsub_d_128:
case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d:
- return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_phsub_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ Opcode = X86ISD::FHADD;
+ break;
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ Opcode = X86ISD::FHSUB;
+ break;
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ Opcode = X86ISD::HADD;
+ break;
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d:
+ Opcode = X86ISD::HSUB;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256:
case Intrinsic::x86_avx2_psllv_q_256:
- return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrlv_d:
case Intrinsic::x86_avx2_psrlv_q:
case Intrinsic::x86_avx2_psrlv_d_256:
case Intrinsic::x86_avx2_psrlv_q_256:
- return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256:
- return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrav_d_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ Opcode = ISD::SHL;
+ break;
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ Opcode = ISD::SRL;
+ break;
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ Opcode = ISD::SRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
@@ -9635,15 +9764,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx2_psign_d:
return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_sse41_insertps:
return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx_vperm2f128_ps_256:
case Intrinsic::x86_avx_vperm2f128_pd_256:
case Intrinsic::x86_avx_vperm2f128_si_256:
case Intrinsic::x86_avx2_vperm2i128:
return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
@@ -9673,7 +9805,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
- unsigned X86CC = 0;
+ unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
@@ -9724,44 +9856,93 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_psll_d:
case Intrinsic::x86_avx2_psll_q:
- return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psrl_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
- return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psra_w:
case Intrinsic::x86_sse2_psra_d:
case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psra_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ Opcode = X86ISD::VSHL;
+ break;
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ Opcode = X86ISD::VSRL;
+ break;
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ Opcode = X86ISD::VSRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/AVX immediate shift intrinsics
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
- return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_avx2_psrli_d:
case Intrinsic::x86_avx2_psrli_q:
- return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psrai_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ Opcode = X86ISD::VSHLI;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ Opcode = X86ISD::VSRLI;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ Opcode = X86ISD::VSRAI;
+ break;
+ }
+ return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
+ }
+
// Fix vector shift instructions where the last operand is a non-immediate
// i32 value.
case Intrinsic::x86_mmx_pslli_w:
@@ -9776,8 +9957,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
if (isa<ConstantSDNode>(ShAmt))
return SDValue();
- unsigned NewIntNo = 0;
+ unsigned NewIntNo;
switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_mmx_pslli_w:
NewIntNo = Intrinsic::x86_mmx_psll_w;
break;
@@ -9802,7 +9984,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_mmx_psrai_d:
NewIntNo = Intrinsic::x86_mmx_psra_d;
break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
// The vector shift intrinsics with scalars uses 32b shift amounts but
@@ -9818,6 +9999,84 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(NewIntNo, MVT::i32),
Op.getOperand(1), ShAmt);
}
+ case Intrinsic::x86_sse42_pcmpistria128:
+ case Intrinsic::x86_sse42_pcmpestria128:
+ case Intrinsic::x86_sse42_pcmpistric128:
+ case Intrinsic::x86_sse42_pcmpestric128:
+ case Intrinsic::x86_sse42_pcmpistrio128:
+ case Intrinsic::x86_sse42_pcmpestrio128:
+ case Intrinsic::x86_sse42_pcmpistris128:
+ case Intrinsic::x86_sse42_pcmpestris128:
+ case Intrinsic::x86_sse42_pcmpistriz128:
+ case Intrinsic::x86_sse42_pcmpestriz128: {
+ unsigned Opcode;
+ unsigned X86CC;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse42_pcmpistria128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_A;
+ break;
+ case Intrinsic::x86_sse42_pcmpestria128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_A;
+ break;
+ case Intrinsic::x86_sse42_pcmpistric128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse42_pcmpestric128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_sse42_pcmpistrio128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_O;
+ break;
+ case Intrinsic::x86_sse42_pcmpestrio128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_O;
+ break;
+ case Intrinsic::x86_sse42_pcmpistris128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_S;
+ break;
+ case Intrinsic::x86_sse42_pcmpestris128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_S;
+ break;
+ case Intrinsic::x86_sse42_pcmpistriz128:
+ Opcode = X86ISD::PCMPISTRI;
+ X86CC = X86::COND_E;
+ break;
+ case Intrinsic::x86_sse42_pcmpestriz128:
+ Opcode = X86ISD::PCMPESTRI;
+ X86CC = X86::COND_E;
+ break;
+ }
+ SmallVector<SDValue, 5> NewOps;
+ NewOps.append(Op->op_begin()+1, Op->op_end());
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8),
+ SDValue(PCMP.getNode(), 1));
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ case Intrinsic::x86_sse42_pcmpistri128:
+ case Intrinsic::x86_sse42_pcmpestri128: {
+ unsigned Opcode;
+ if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
+ Opcode = X86ISD::PCMPISTRI;
+ else
+ Opcode = X86ISD::PCMPESTRI;
+
+ SmallVector<SDValue, 5> NewOps;
+ NewOps.append(Op->op_begin()+1, Op->op_end());
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ }
}
}
@@ -10231,7 +10490,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- assert(VT.getSizeInBits() == 256 && VT.isInteger() &&
+ assert(VT.is256BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
@@ -10256,14 +10515,14 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
}
SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType().getSizeInBits() == 256 &&
+ assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType().getSizeInBits() == 256 &&
+ assert(Op.getValueType().is256BitVector() &&
Op.getValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
@@ -10273,7 +10532,7 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasAVX2())
return Lower256IntArith(Op, DAG);
assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
@@ -10503,7 +10762,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
// Decompose 256-bit shifts into smaller 128-bit shifts.
- if (VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
@@ -10992,9 +11251,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
Results.push_back(Swap.getValue(1));
}
-void X86TargetLowering::
+static void
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG, unsigned NewOp) const {
+ SelectionDAG &DAG, unsigned NewOp) {
DebugLoc dl = Node->getDebugLoc();
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
@@ -11092,7 +11351,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Regs64bit ? X86::RBX : X86::EBX,
swapInL, cpInH.getValue(1));
swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
- Regs64bit ? X86::RCX : X86::ECX,
+ Regs64bit ? X86::RCX : X86::ECX,
swapInH, swapInL.getValue(1));
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
@@ -11115,26 +11374,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
- return;
- case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ case ISD::ATOMIC_SWAP: {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::ATOMIC_LOAD_ADD:
+ Opc = X86ISD::ATOMADD64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Opc = X86ISD::ATOMAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ Opc = X86ISD::ATOMNAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ Opc = X86ISD::ATOMOR64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ Opc = X86ISD::ATOMSUB64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Opc = X86ISD::ATOMXOR64_DAG;
+ break;
+ case ISD::ATOMIC_SWAP:
+ Opc = X86ISD::ATOMSWAP64_DAG;
+ break;
+ }
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
return;
+ }
case ISD::ATOMIC_LOAD:
ReplaceATOMIC_LOAD(N, Results, DAG);
}
@@ -11194,6 +11467,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FMAXC: return "X86ISD::FMAXC";
+ case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
@@ -11212,7 +11487,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -11273,6 +11550,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
+ case X86ISD::FMADD: return "X86ISD::FMADD";
+ case X86ISD::FMSUB: return "X86ISD::FMSUB";
+ case X86ISD::FNMADD: return "X86ISD::FNMADD";
+ case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
+ case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
+ case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
}
}
@@ -11408,7 +11691,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
return true;
- if (NumElts == 4 && VT.getSizeInBits() == 128) {
+ if (NumElts == 4 && VT.is128BitVector()) {
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
@@ -11834,8 +12117,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
MIB.addOperand(Op);
}
BuildMI(*BB, MI, dl,
- TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr),
- MI->getOperand(0).getReg())
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
@@ -11868,24 +12150,6 @@ X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
}
MachineBasicBlock *
-X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
- DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- // First arg in ECX, the second in EAX.
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
- .addReg(MI->getOperand(0).getReg());
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
- .addReg(MI->getOperand(1).getReg());
-
- // The instruction doesn't actually take any operands though.
- BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
-
- MI->eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
-MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -12675,185 +12939,208 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// String/text processing lowering.
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
- return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
- return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
- return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
- case X86::VPCMPESTRM128MEM:
- return EmitPCMP(MI, BB, 5, true /* in mem */);
+ case X86::VPCMPESTRM128MEM: {
+ unsigned NumArgs;
+ bool MemArg;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
+ NumArgs = 3; MemArg = false; break;
+ case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
+ NumArgs = 3; MemArg = true; break;
+ case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
+ NumArgs = 5; MemArg = false; break;
+ case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
+ NumArgs = 5; MemArg = true; break;
+ }
+ return EmitPCMP(MI, BB, NumArgs, MemArg);
+ }
// Thread synchronization.
case X86::MONITOR:
return EmitMonitor(MI, BB);
- case X86::MWAIT:
- return EmitMwait(MI, BB);
// Atomic Lowering.
- case X86::ATOMAND32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
- X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMOR32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
- X86::OR32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMXOR32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
- X86::XOR32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMNAND32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
- X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass, true);
case X86::ATOMMIN32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
case X86::ATOMMAX32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
case X86::ATOMUMIN32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
case X86::ATOMUMAX32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+ case X86::ATOMMIN16:
+ case X86::ATOMMAX16:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMAX16:
+ case X86::ATOMMIN64:
+ case X86::ATOMMAX64:
+ case X86::ATOMUMIN64:
+ case X86::ATOMUMAX64: {
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break;
+ case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break;
+ case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
+ case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
+ case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break;
+ case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break;
+ case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
+ case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
+ case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break;
+ case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break;
+ case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
+ case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
+ // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ }
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
+ }
+
+ case X86::ATOMAND32:
+ case X86::ATOMOR32:
+ case X86::ATOMXOR32:
+ case X86::ATOMNAND32: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND32:
+ RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
+ case X86::ATOMOR32:
+ RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break;
+ case X86::ATOMXOR32:
+ RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
+ case X86::ATOMNAND32:
+ RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV32rm, X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ &X86::GR32RegClass, Invert);
+ }
case X86::ATOMAND16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
- X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
case X86::ATOMOR16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
- X86::OR16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
case X86::ATOMXOR16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
- X86::XOR16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
- case X86::ATOMNAND16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
- X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
+ case X86::ATOMNAND16: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND16:
+ RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
+ case X86::ATOMOR16:
+ RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break;
+ case X86::ATOMXOR16:
+ RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
+ case X86::ATOMNAND16:
+ RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV16rm, X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
- &X86::GR16RegClass, true);
- case X86::ATOMMIN16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
- case X86::ATOMMAX16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
- case X86::ATOMUMIN16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
- case X86::ATOMUMAX16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+ &X86::GR16RegClass, Invert);
+ }
case X86::ATOMAND8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
- X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
case X86::ATOMOR8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
- X86::OR8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
case X86::ATOMXOR8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
- X86::XOR8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
- case X86::ATOMNAND8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
- X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
+ case X86::ATOMNAND8: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND8:
+ RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
+ case X86::ATOMOR8:
+ RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break;
+ case X86::ATOMXOR8:
+ RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
+ case X86::ATOMNAND8:
+ RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV8rm, X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
- &X86::GR8RegClass, true);
- // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ &X86::GR8RegClass, Invert);
+ }
+
// This group is for 64-bit host.
case X86::ATOMAND64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
- X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
case X86::ATOMOR64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
- X86::OR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
case X86::ATOMXOR64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
- X86::XOR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
- case X86::ATOMNAND64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
- X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
+ case X86::ATOMNAND64: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND64:
+ RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
+ case X86::ATOMOR64:
+ RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break;
+ case X86::ATOMXOR64:
+ RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
+ case X86::ATOMNAND64:
+ RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV64rm, X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
- &X86::GR64RegClass, true);
- case X86::ATOMMIN64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
- case X86::ATOMMAX64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
- case X86::ATOMUMIN64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
- case X86::ATOMUMAX64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+ &X86::GR64RegClass, Invert);
+ }
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::AND32rr, X86::AND32rr,
- X86::AND32ri, X86::AND32ri,
- false);
case X86::ATOMOR6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::OR32rr, X86::OR32rr,
- X86::OR32ri, X86::OR32ri,
- false);
case X86::ATOMXOR6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::XOR32rr, X86::XOR32rr,
- X86::XOR32ri, X86::XOR32ri,
- false);
case X86::ATOMNAND6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::AND32rr, X86::AND32rr,
- X86::AND32ri, X86::AND32ri,
- true);
case X86::ATOMADD6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::ADD32rr, X86::ADC32rr,
- X86::ADD32ri, X86::ADC32ri,
- false);
case X86::ATOMSUB6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::SUB32rr, X86::SBB32rr,
- X86::SUB32ri, X86::SBB32ri,
- false);
- case X86::ATOMSWAP6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::MOV32rr, X86::MOV32rr,
- X86::MOV32ri, X86::MOV32ri,
- false);
+ case X86::ATOMSWAP6432: {
+ bool Invert = false;
+ unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND6432:
+ RegOpcL = RegOpcH = X86::AND32rr;
+ ImmOpcL = ImmOpcH = X86::AND32ri;
+ break;
+ case X86::ATOMOR6432:
+ RegOpcL = RegOpcH = X86::OR32rr;
+ ImmOpcL = ImmOpcH = X86::OR32ri;
+ break;
+ case X86::ATOMXOR6432:
+ RegOpcL = RegOpcH = X86::XOR32rr;
+ ImmOpcL = ImmOpcH = X86::XOR32ri;
+ break;
+ case X86::ATOMNAND6432:
+ RegOpcL = RegOpcH = X86::AND32rr;
+ ImmOpcL = ImmOpcH = X86::AND32ri;
+ Invert = true;
+ break;
+ case X86::ATOMADD6432:
+ RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
+ ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
+ break;
+ case X86::ATOMSUB6432:
+ RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
+ ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
+ break;
+ case X86::ATOMSWAP6432:
+ RegOpcL = RegOpcH = X86::MOV32rr;
+ ImmOpcL = ImmOpcH = X86::MOV32ri;
+ break;
+ }
+ return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
+ ImmOpcL, ImmOpcH, Invert);
+ }
+
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -13043,7 +13330,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
false/*WriteMem*/);
return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
}
- }
+ }
// Emit a zeroed vector and insert the desired subvector on its
// first half.
@@ -13086,12 +13373,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
+ if (Subtarget->hasAVX() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
// Only handle 128 wide vector from here on.
- if (VT.getSizeInBits() != 128)
+ if (!VT.is128BitVector())
return SDValue();
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
@@ -13109,7 +13396,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
/// a sequence of vector shuffle operations.
/// It is possible when we truncate 256-bit vector to 128-bit vector
-SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
+SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
DAGCombinerInfo &DCI) const {
if (!DCI.isBeforeLegalizeOps())
return SDValue();
@@ -13151,8 +13438,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
// PSHUFD
static const int ShufMask1[] = {0, 2, 0, 0};
- OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), ShufMask1);
- OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), ShufMask1);
+ SDValue Undef = DAG.getUNDEF(VT);
+ OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
// MOVLHPS
static const int ShufMask2[] = {0, 1, 4, 5};
@@ -13210,10 +13498,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
- OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
+ SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
@@ -13718,6 +14005,88 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Check whether a boolean test is testing a boolean value generated by
+// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
+// code.
+//
+// Simplify the following patterns:
+// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
+// to (Op EFLAGS Cond)
+//
+// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
+// to (Op EFLAGS !Cond)
+//
+// where Op could be BRCOND or CMOV.
+//
+static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
+ // Quit if not CMP and SUB with its value result used.
+ if (Cmp.getOpcode() != X86ISD::CMP &&
+ (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
+ return SDValue();
+
+ // Quit if not used as a boolean value.
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ // Check CMP operands. One of them should be 0 or 1 and the other should be
+ // an SetCC or extended from it.
+ SDValue Op1 = Cmp.getOperand(0);
+ SDValue Op2 = Cmp.getOperand(1);
+
+ SDValue SetCC;
+ const ConstantSDNode* C = 0;
+ bool needOppositeCond = (CC == X86::COND_E);
+
+ if ((C = dyn_cast<ConstantSDNode>(Op1)))
+ SetCC = Op2;
+ else if ((C = dyn_cast<ConstantSDNode>(Op2)))
+ SetCC = Op1;
+ else // Quit if all operands are not constants.
+ return SDValue();
+
+ if (C->getZExtValue() == 1)
+ needOppositeCond = !needOppositeCond;
+ else if (C->getZExtValue() != 0)
+ // Quit if the constant is neither 0 or 1.
+ return SDValue();
+
+ // Skip 'zext' node.
+ if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
+ SetCC = SetCC.getOperand(0);
+
+ // Quit if not SETCC.
+ // FIXME: So far we only handle the boolean value generated from SETCC. If
+ // there is other ways to generate boolean values, we need handle them here
+ // as well.
+ if (SetCC.getOpcode() != X86ISD::SETCC)
+ return SDValue();
+
+ // Set the condition code or opposite one if necessary.
+ CC = X86::CondCode(SetCC.getConstantOperandVal(0));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+
+ return SetCC.getOperand(1);
+}
+
+static bool IsValidFCMOVCondition(X86::CondCode CC) {
+ switch (CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_AE:
+ case X86::COND_A:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
@@ -13731,6 +14100,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
+
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
@@ -13742,6 +14112,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(Cond, CC);
+ if (Flags.getNode() &&
+ // Extra check as FCMOV only supports a subset of X86 cond.
+ (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) {
+ SDValue Ops[] = { FalseOp, TrueOp,
+ DAG.getConstant(CC, MVT::i8), Flags };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
+ Ops, array_lengthof(Ops));
+ }
+
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
@@ -14164,7 +14546,7 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) {
// Sometimes the operand may come from a insert_subvector building a 256-bit
// allones vector
- if (VT.getSizeInBits() == 256 &&
+ if (VT.is256BitVector() &&
N->getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
@@ -14609,7 +14991,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
- if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2() &&
+ if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
StoredVal.getNumOperands() == 2) {
SDValue Value0 = StoredVal.getOperand(0);
@@ -14992,6 +15374,29 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
+/// X86ISD::FMAX nodes.
+static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
+
+ // Only perform optimizations if UnsafeMath is used.
+ if (!DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
+ // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
+ // into FMINC and MMAXC, which are Commutative operations.
+ unsigned NewOp = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unknown opcode");
+ case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
+ case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
+ }
+
+ return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+}
+
+
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
@@ -15067,19 +15472,19 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
// concat the vectors to original VT
unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue Undef = DAG.getUNDEF(OpVT);
+
SmallVector<int,8> ShufMask1(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask1[i] = i;
- SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- &ShufMask1[0]);
+ SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
SmallVector<int,8> ShufMask2(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask2[i] = i + NumElems/2;
- SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- &ShufMask2[0]);
+ SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
VT.getVectorNumElements()/2);
@@ -15092,6 +15497,40 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ EVT ScalarVT = VT.getScalarType();
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA())
+ return SDValue();
+
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+ SDValue C = N->getOperand(2);
+
+ bool NegA = (A.getOpcode() == ISD::FNEG);
+ bool NegB = (B.getOpcode() == ISD::FNEG);
+ bool NegC = (C.getOpcode() == ISD::FNEG);
+
+ // Negative multiplication when NegA xor NegB
+ bool NegMul = (NegA != NegB);
+ if (NegA)
+ A = A.getOperand(0);
+ if (NegB)
+ B = B.getOperand(0);
+ if (NegC)
+ C = C.getOperand(0);
+
+ unsigned Opcode;
+ if (!NegMul)
+ Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB;
+ else
+ Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB;
+ return DAG.getNode(Opcode, dl, VT, A, B, C);
+}
+
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -15164,7 +15603,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(1);
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
@@ -15187,19 +15626,50 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
- unsigned X86CC = N->getConstantOperandVal(0);
- SDValue EFLAG = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
+ SDValue EFLAGS = N->getOperand(1);
// Materialize "setb reg" as "sbb reg,reg", since it can be extended without
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
- if (X86CC == X86::COND_B)
+ if (CC == X86::COND_B)
return DAG.getNode(ISD::AND, DL, MVT::i8,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), EFLAG),
+ DAG.getConstant(CC, MVT::i8), EFLAGS),
DAG.getConstant(1, MVT::i8));
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
+ }
+
+ return SDValue();
+}
+
+// Optimize branch condition evaluation.
+//
+static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Chain = N->getOperand(0);
+ SDValue Dest = N->getOperand(1);
+ SDValue EFLAGS = N->getOperand(3);
+ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
+
+ SDValue Flags;
+
+ Flags = BoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+ Flags);
+ }
+
return SDValue();
}
@@ -15408,6 +15878,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FMIN:
+ case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
@@ -15417,6 +15889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
+ case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN:
case X86ISD::UNPCKH:
@@ -15431,6 +15904,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERMILP:
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
+ case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
}
return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 78e4d75..74f5167 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -137,10 +137,6 @@ namespace llvm {
/// relative displacements.
WrapperRIP,
- /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
- /// of an XMM vector, with the high word zero filled.
- MOVQ2DQ,
-
/// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
/// to an MMX vector. If you think this is too close to the previous
/// mnemonic, so do I; blame Intel.
@@ -199,6 +195,9 @@ namespace llvm {
///
FMAX, FMIN,
+ /// FMAXC, FMINC - Commutative FMIN and FMAX.
+ FMAXC, FMINC,
+
/// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
/// approximation. Note that these typically require refinement
/// in order to obtain suitable precision.
@@ -231,6 +230,9 @@ namespace llvm {
// VSEXT_MOVL - Vector move low and sign extend.
VSEXT_MOVL,
+ // VFPEXT - Vector FP extend.
+ VFPEXT,
+
// VSHL, VSRL - 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -294,6 +296,14 @@ namespace llvm {
// PMULUDQ - Vector multiply packed unsigned doubleword integers
PMULUDQ,
+ // FMA nodes
+ FMADD,
+ FNMADD,
+ FMSUB,
+ FNMSUB,
+ FMADDSUB,
+ FMSUBADD,
+
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
// with control flow.
@@ -325,6 +335,10 @@ namespace llvm {
// RDRAND - Get a random integer and indicate whether it is valid in CF.
RDRAND,
+ // PCMP*STRI
+ PCMPISTRI,
+ PCMPESTRI,
+
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
@@ -597,6 +611,12 @@ namespace llvm {
virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
/// isNarrowingProfitable - Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
/// from i32 to i8 but not from i32 to i16.
@@ -656,7 +676,8 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const;
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
@@ -813,6 +834,8 @@ namespace llvm {
SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
+
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -844,9 +867,6 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const;
- void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG, unsigned NewOp) const;
-
/// Utility function to emit string processing sse4.2 instructions
/// that return in xmm0.
/// This takes the instruction to expand, the associated machine basic
@@ -933,7 +953,8 @@ namespace llvm {
};
namespace X86 {
- FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
}
}
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index b6ba68f..f790611 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
X86xor_flag, xor, 1, 0>;
defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
X86add_flag, add, 1, 1>;
+let isCompare = 1 in {
defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
X86sub_flag, sub, 0, 0>;
+}
// Arithmetic.
let Uses = [EFLAGS] in {
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 0d5490a..2eb454d 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -39,12 +39,15 @@ let neverHasSideEffects = 1 in {
// Sign/Zero extenders
+let neverHasSideEffects = 1 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
TB, OpSize;
+let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
TB, OpSize;
+} // neverHasSideEffects = 1
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
@@ -59,12 +62,15 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
[(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
TB;
+let neverHasSideEffects = 1 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
TB, OpSize;
+let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
TB, OpSize;
+} // neverHasSideEffects = 1
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
@@ -82,6 +88,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.
+let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
@@ -91,6 +98,7 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[], IIC_MOVZX>, TB;
+}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
// operand, which makes it a rare instruction with an 8-bit register
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 8802a2e..95ee7e5 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -16,159 +16,307 @@
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
-multiclass fma3p_rm<bits<8> opc, string OpcodeStr> {
-let neverHasSideEffects = 1 in {
- def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
- let mayLoad = 1 in
- def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
- def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
- let mayLoad = 1 in
- def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
-} // neverHasSideEffects = 1
-}
+multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
+ PatFrag MemFrag128, PatFrag MemFrag256,
+ ValueType OpVT128, ValueType OpVT256,
+ SDPatternOperator Op = null_frag, bit MayLoad = 1> {
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (OpVT128 (Op VR128:$src2,
+ VR128:$src1, VR128:$src3)))]>;
-// Intrinsic for 132 pattern
-multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag MemFrag128, PatFrag MemFrag256,
- Intrinsic Int128, Intrinsic Int256> {
- def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src3, VR128:$src2))]>;
- def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (Int128 VR128:$src1, (MemFrag128 addr:$src3), VR128:$src2))]>;
- def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src3, VR256:$src2))]>;
- def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, (MemFrag256 addr:$src3), VR256:$src2))]>;
+ let mayLoad = MayLoad in
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
+ (MemFrag128 addr:$src3))))]>;
+
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
+ VR256:$src3)))]>;
+
+ let mayLoad = MayLoad in
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst,
+ (OpVT256 (Op VR256:$src2, VR256:$src1,
+ (MemFrag256 addr:$src3))))]>;
}
} // Constraints = "$src1 = $dst"
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy,
PatFrag MemFrag128, PatFrag MemFrag256,
- Intrinsic Int128, Intrinsic Int256> {
- defm r132 : fma3p_rm_int <opc132, !strconcat(OpcodeStr,
- !strconcat("132", PackTy)), MemFrag128, MemFrag256,
- Int128, Int256>;
- defm r132 : fma3p_rm <opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
- defm r213 : fma3p_rm <opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
- defm r231 : fma3p_rm <opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+ SDNode Op, ValueType OpTy128, ValueType OpTy256> {
+ defm r213 : fma3p_rm<opc213,
+ !strconcat(OpcodeStr, !strconcat("213", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256, Op, 0>;
+let neverHasSideEffects = 1 in {
+ defm r132 : fma3p_rm<opc132,
+ !strconcat(OpcodeStr, !strconcat("132", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
+ defm r231 : fma3p_rm<opc231,
+ !strconcat(OpcodeStr, !strconcat("231", PackTy)),
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
+} // neverHasSideEffects = 1
}
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfmadd_ps, int_x86_fma_vfmadd_ps_256>;
- defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfmsub_ps, int_x86_fma_vfmsub_ps_256>;
+ memopv8f32, X86Fmadd, v4f32, v8f32>;
+ defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
+ memopv8f32, X86Fmsub, v4f32, v8f32>;
defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
- memopv4f32, memopv8f32, int_x86_fma_vfmaddsub_ps,
- int_x86_fma_vfmaddsub_ps_256>;
+ memopv4f32, memopv8f32, X86Fmaddsub,
+ v4f32, v8f32>;
defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
- memopv4f32, memopv8f32, int_x86_fma_vfmsubadd_ps,
- int_x86_fma_vfmaddsub_ps_256>;
+ memopv4f32, memopv8f32, X86Fmsubadd,
+ v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmadd_pd, int_x86_fma_vfmadd_pd_256>, VEX_W;
+ memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmsub_pd, int_x86_fma_vfmsub_pd_256>, VEX_W;
- defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmaddsub_pd, int_x86_fma_vfmaddsub_pd_256>, VEX_W;
- defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmsubadd_pd, int_x86_fma_vfmsubadd_pd_256>, VEX_W;
+ memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
+ defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
+ memopv2f64, memopv4f64, X86Fmaddsub,
+ v2f64, v4f64>, VEX_W;
+ defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
+ memopv2f64, memopv4f64, X86Fmsubadd,
+ v2f64, v4f64>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfnmadd_ps, int_x86_fma_vfnmadd_ps_256>;
+ memopv8f32, X86Fnmadd, v4f32, v8f32>;
defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfnmsub_ps, int_x86_fma_vfnmsub_ps_256>;
+ memopv8f32, X86Fnmsub, v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfnmadd_pd, int_x86_fma_vfnmadd_pd_256>, VEX_W;
- defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfnmsub_pd, int_x86_fma_vfnmsub_pd_256>, VEX_W;
+ memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
+ defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
+ memopv2f64, memopv4f64, X86Fnmsub, v2f64,
+ v4f64>, VEX_W;
}
+let Predicates = [HasFMA] in {
+ def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+} // Predicates = [HasFMA]
let Constraints = "$src1 = $dst" in {
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
- RegisterClass RC> {
-let neverHasSideEffects = 1 in {
- def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
- let mayLoad = 1 in
- def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86memop:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- []>;
-} // neverHasSideEffects = 1
+ RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
+ SDPatternOperator OpNode = null_frag, bit MayLoad = 1> {
+ def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+ let mayLoad = MayLoad in
+ def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1,
+ (mem_frag addr:$src3))))]>;
}
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
- ComplexPattern mem_cpat, Intrinsic IntId> {
+ ComplexPattern mem_cpat, Intrinsic IntId,
+ RegisterClass RC> {
def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src3, VR128:$src2))]>;
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
+ VR128:$src3))]>;
def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, memop:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId VR128:$src1, mem_cpat:$src3, VR128:$src2))]>;
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
}
} // Constraints = "$src1 = $dst"
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpStr, Intrinsic IntF32, Intrinsic IntF64> {
- defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>;
- defm SSr213 : fma3s_rm<opc213, !strconcat(OpStr, "213ss"), f32mem, FR32>;
- defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>;
- defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>, VEX_W;
- defm SDr213 : fma3s_rm<opc213, !strconcat(OpStr, "213sd"), f64mem, FR64>, VEX_W;
- defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>, VEX_W;
- defm SSr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132ss"), ssmem,
- sse_load_f32, IntF32>;
- defm SDr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132sd"), sdmem,
- sse_load_f64, IntF64>;
+ string OpStr, string PackTy, Intrinsic Int,
+ SDNode OpNode, RegisterClass RC, ValueType OpVT,
+ X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
+ ComplexPattern mem_cpat> {
+let neverHasSideEffects = 1 in {
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)),
+ x86memop, RC, OpVT, mem_frag>;
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)),
+ x86memop, RC, OpVT, mem_frag>;
+}
+
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ x86memop, RC, OpVT, mem_frag, OpNode, 0>,
+ fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ memop, mem_cpat, Int, RC>;
+}
+
+multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpStr, Intrinsic IntF32, Intrinsic IntF64,
+ SDNode OpNode> {
+ defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
+ FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
+ defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
+ FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
}
-defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
- int_x86_fma_vfmadd_sd>, VEX_LIG;
-defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
- int_x86_fma_vfmsub_sd>, VEX_LIG;
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
+ int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
+ int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
-defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
- int_x86_fma_vfnmadd_sd>, VEX_LIG;
-defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
- int_x86_fma_vfnmsub_sd>, VEX_LIG;
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
+ int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
+ int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index a115ab4..81b4f81 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -366,7 +366,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
-// SSDI - SSE2 instructions with XS prefix.
+// S2SI - SSE2 instructions with XS prefix.
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with TB and OpSize prefixes.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
@@ -379,10 +379,10 @@ class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
-class SSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>;
-class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -397,6 +397,10 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
Requires<[HasAVX]>;
+class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
+ Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index ec030dd..ee2d3c4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -29,6 +29,13 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+
+// Commutative and Associative FMIN and FMAX.
+def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
@@ -73,14 +80,20 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<1, 0> ]>>;
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
+ SDTypeProfile<1, 1,
+ [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def X86vfpext : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>]>>;
+
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
@@ -125,7 +138,10 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+
+def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
@@ -160,9 +176,26 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
-def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
-def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
-def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
+def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
+def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
+def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
+def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
+def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
+def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>;
+
+def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
+ SDTCisVT<4, i8>]>;
+def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, v16i8>, SDTCisVT<3, i32>,
+ SDTCisVT<4, v16i8>, SDTCisVT<5, i32>,
+ SDTCisVT<6, i8>]>;
+
+def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
+def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 69493bc..459f01a 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -414,12 +414,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
- { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
- { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
- { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
- { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
- { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
- { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
@@ -680,6 +674,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL64rr, X86::IMUL64rm, 0 },
{ X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
{ X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
{ X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
@@ -1130,8 +1130,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
{ X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 },
{ X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 },
- { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, 0 },
- { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, 0 },
+ { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 },
+ { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 },
{ X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
{ X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
@@ -1145,10 +1145,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
- { X86::VFMADDPSr132r_Int, X86::VFMADDPSr132m_Int, TB_ALIGN_16 },
- { X86::VFMADDPDr132r_Int, X86::VFMADDPDr132m_Int, TB_ALIGN_16 },
- { X86::VFMADDPSr132rY_Int, X86::VFMADDPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFMADDPDr132rY_Int, X86::VFMADDPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
{ X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
@@ -1156,8 +1152,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
{ X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 },
{ X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 },
- { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, 0 },
- { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, 0 },
+ { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 },
+ { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 },
{ X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
{ X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
@@ -1171,10 +1167,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
{ X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
{ X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
- { X86::VFNMADDPSr132r_Int, X86::VFNMADDPSr132m_Int, TB_ALIGN_16 },
- { X86::VFNMADDPDr132r_Int, X86::VFNMADDPDr132m_Int, TB_ALIGN_16 },
- { X86::VFNMADDPSr132rY_Int, X86::VFNMADDPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFNMADDPDr132rY_Int, X86::VFNMADDPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
{ X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
@@ -1182,8 +1174,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
{ X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 },
{ X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 },
- { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, 0 },
- { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, 0 },
+ { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 },
+ { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 },
{ X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
{ X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
@@ -1197,10 +1189,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFMSUBPSr132r_Int, X86::VFMSUBPSr132m_Int, TB_ALIGN_16 },
- { X86::VFMSUBPDr132r_Int, X86::VFMSUBPDr132m_Int, TB_ALIGN_16 },
- { X86::VFMSUBPSr132rY_Int, X86::VFMSUBPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFMSUBPDr132rY_Int, X86::VFMSUBPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
{ X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
@@ -1208,8 +1196,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
{ X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 },
{ X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 },
- { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, 0 },
- { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, 0 },
+ { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 },
+ { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 },
{ X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
{ X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
@@ -1223,10 +1211,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFNMSUBPSr132r_Int, X86::VFNMSUBPSr132m_Int, TB_ALIGN_16 },
- { X86::VFNMSUBPDr132r_Int, X86::VFNMSUBPDr132m_Int, TB_ALIGN_16 },
- { X86::VFNMSUBPSr132rY_Int, X86::VFNMSUBPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFNMSUBPDr132rY_Int, X86::VFNMSUBPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 },
{ X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 },
@@ -1240,10 +1224,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 },
{ X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 },
{ X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 },
- { X86::VFMADDSUBPSr132r_Int, X86::VFMADDSUBPSr132m_Int, TB_ALIGN_16 },
- { X86::VFMADDSUBPDr132r_Int, X86::VFMADDSUBPDr132m_Int, TB_ALIGN_16 },
- { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 },
{ X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 },
@@ -1257,10 +1237,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
- { X86::VFMSUBADDPSr132r_Int, X86::VFMSUBADDPSr132m_Int, TB_ALIGN_16 },
- { X86::VFMSUBADDPDr132r_Int, X86::VFMSUBADDPDr132m_Int, TB_ALIGN_16 },
- { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 },
- { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1318,8 +1294,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
switch (MI.getOpcode()) {
- default:
- llvm_unreachable(0);
+ default: llvm_unreachable("Unreachable!");
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
@@ -1463,6 +1438,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+ // Don't waste compile time scanning use-def chains of physregs.
+ if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
+ return false;
bool isPICBase = false;
for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
E = MRI.def_end(); I != E; ++I) {
@@ -1480,78 +1458,69 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const {
switch (MI->getOpcode()) {
default: break;
- case X86::MOV8rm:
- case X86::MOV16rm:
- case X86::MOV32rm:
- case X86::MOV64rm:
- case X86::LD_Fp64m:
- case X86::MOVSSrm:
- case X86::MOVSDrm:
- case X86::MOVAPSrm:
- case X86::MOVUPSrm:
- case X86::MOVAPDrm:
- case X86::MOVDQArm:
- case X86::VMOVSSrm:
- case X86::VMOVSDrm:
- case X86::VMOVAPSrm:
- case X86::VMOVUPSrm:
- case X86::VMOVAPDrm:
- case X86::VMOVDQArm:
- case X86::VMOVAPSYrm:
- case X86::VMOVUPSYrm:
- case X86::VMOVAPDYrm:
- case X86::VMOVDQAYrm:
- case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm:
- case X86::FsVMOVAPSrm:
- case X86::FsVMOVAPDrm:
- case X86::FsMOVAPSrm:
- case X86::FsMOVAPDrm: {
- // Loads from constant pools are trivially rematerializable.
- if (MI->getOperand(1).isReg() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- MI->isInvariantLoad(AA)) {
- unsigned BaseReg = MI->getOperand(1).getReg();
- if (BaseReg == 0 || BaseReg == X86::RIP)
- return true;
- // Allow re-materialization of PIC load.
- if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
- return false;
- const MachineFunction &MF = *MI->getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- bool isPICBase = false;
- for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
- E = MRI.def_end(); I != E; ++I) {
- MachineInstr *DefMI = I.getOperand().getParent();
- if (DefMI->getOpcode() != X86::MOVPC32r)
- return false;
- assert(!isPICBase && "More than one PIC base?");
- isPICBase = true;
- }
- return isPICBase;
- }
- return false;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::FsVMOVAPSrm:
+ case X86::FsVMOVAPDrm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
+ // Loads from constant pools are trivially rematerializable.
+ if (MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ MI->isInvariantLoad(AA)) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0 || BaseReg == X86::RIP)
+ return true;
+ // Allow re-materialization of PIC load.
+ if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ return false;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
}
+ return false;
+ }
- case X86::LEA32r:
- case X86::LEA64r: {
- if (MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- !MI->getOperand(4).isReg()) {
- // lea fi#, lea GV, etc. are all rematerializable.
- if (!MI->getOperand(1).isReg())
- return true;
- unsigned BaseReg = MI->getOperand(1).getReg();
- if (BaseReg == 0)
- return true;
- // Allow re-materialization of lea PICBase + x.
- const MachineFunction &MF = *MI->getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- return regIsPICBase(BaseReg, MRI);
- }
- return false;
- }
+ case X86::LEA32r:
+ case X86::LEA64r: {
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ !MI->getOperand(4).isReg()) {
+ // lea fi#, lea GV, etc. are all rematerializable.
+ if (!MI->getOperand(1).isReg())
+ return true;
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of lea PICBase + x.
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
}
// All other instructions marked M_REMATERIALIZABLE are always trivially
@@ -1660,7 +1629,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
case X86::MOV64r0: {
if (!isSafeToClobberEFLAGS(MBB, I)) {
switch (Opc) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::MOV8r0: Opc = X86::MOV8ri; break;
case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
@@ -1733,8 +1702,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
switch (MIOpc) {
- default:
- llvm_unreachable(0);
+ default: llvm_unreachable("Unreachable!");
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
@@ -2126,57 +2094,25 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MI->getOperand(3).setImm(Size-Amt);
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
}
- case X86::CMOVB16rr:
- case X86::CMOVB32rr:
- case X86::CMOVB64rr:
- case X86::CMOVAE16rr:
- case X86::CMOVAE32rr:
- case X86::CMOVAE64rr:
- case X86::CMOVE16rr:
- case X86::CMOVE32rr:
- case X86::CMOVE64rr:
- case X86::CMOVNE16rr:
- case X86::CMOVNE32rr:
- case X86::CMOVNE64rr:
- case X86::CMOVBE16rr:
- case X86::CMOVBE32rr:
- case X86::CMOVBE64rr:
- case X86::CMOVA16rr:
- case X86::CMOVA32rr:
- case X86::CMOVA64rr:
- case X86::CMOVL16rr:
- case X86::CMOVL32rr:
- case X86::CMOVL64rr:
- case X86::CMOVGE16rr:
- case X86::CMOVGE32rr:
- case X86::CMOVGE64rr:
- case X86::CMOVLE16rr:
- case X86::CMOVLE32rr:
- case X86::CMOVLE64rr:
- case X86::CMOVG16rr:
- case X86::CMOVG32rr:
- case X86::CMOVG64rr:
- case X86::CMOVS16rr:
- case X86::CMOVS32rr:
- case X86::CMOVS64rr:
- case X86::CMOVNS16rr:
- case X86::CMOVNS32rr:
- case X86::CMOVNS64rr:
- case X86::CMOVP16rr:
- case X86::CMOVP32rr:
- case X86::CMOVP64rr:
- case X86::CMOVNP16rr:
- case X86::CMOVNP32rr:
- case X86::CMOVNP64rr:
- case X86::CMOVO16rr:
- case X86::CMOVO32rr:
- case X86::CMOVO64rr:
- case X86::CMOVNO16rr:
- case X86::CMOVNO32rr:
- case X86::CMOVNO64rr: {
- unsigned Opc = 0;
+ case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
+ case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
+ case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
+ case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
+ case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
+ case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
+ case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
+ case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
+ case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
+ case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
+ case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
+ case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
+ case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
+ case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
+ case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
+ case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
+ unsigned Opc;
switch (MI->getOpcode()) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
@@ -2408,7 +2344,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) {
/// whether it has memory operand.
static unsigned getSETFromCond(X86::CondCode CC,
bool HasMemoryOperand) {
- static const unsigned Opc[16][2] = {
+ static const uint16_t Opc[16][2] = {
{ X86::SETAr, X86::SETAm },
{ X86::SETAEr, X86::SETAEm },
{ X86::SETBr, X86::SETBm },
@@ -2435,7 +2371,7 @@ static unsigned getSETFromCond(X86::CondCode CC,
/// register size in bytes, and operand type.
static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
bool HasMemoryOperand) {
- static const unsigned Opc[32][3] = {
+ static const uint16_t Opc[32][3] = {
{ X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
{ X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
{ X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
@@ -2768,19 +2704,18 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// SrcReg(GR64) -> DestReg(VR64)
if (X86::GR64RegClass.contains(DestReg)) {
- if (X86::VR128RegClass.contains(SrcReg)) {
+ if (X86::VR128RegClass.contains(SrcReg))
// Copy from a VR128 register to a GR64 register.
return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
- } else if (X86::VR64RegClass.contains(SrcReg)) {
+ if (X86::VR64RegClass.contains(SrcReg))
// Copy from a VR64 register to a GR64 register.
return X86::MOVSDto64rr;
- }
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
if (X86::VR128RegClass.contains(DestReg))
return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
// Copy from a GR64 register to a VR64 register.
- else if (X86::VR64RegClass.contains(DestReg))
+ if (X86::VR64RegClass.contains(DestReg))
return X86::MOV64toSDrr;
}
@@ -2788,12 +2723,12 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// SrcReg(GR32) -> DestReg(FR32)
if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
- // Copy from a FR32 register to a GR32 register.
- return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+ // Copy from a FR32 register to a GR32 register.
+ return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
- // Copy from a GR32 register to a FR32 register.
- return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+ // Copy from a GR32 register to a FR32 register.
+ return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
return 0;
}
@@ -2804,7 +2739,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
// First deal with the normal symmetric copies.
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- unsigned Opc = 0;
+ unsigned Opc;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
else if (X86::GR32RegClass.contains(DestReg, SrcReg))
@@ -2843,7 +2778,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
return;
- } else if (X86::GR32RegClass.contains(DestReg)) {
+ }
+ if (X86::GR32RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
return;
@@ -2855,7 +2791,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF64));
return;
- } else if (X86::GR32RegClass.contains(SrcReg)) {
+ }
+ if (X86::GR32RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSH32r))
.addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF32));
@@ -3037,6 +2974,37 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
+ // A SUB can be used to perform comparison.
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(2).getImm();
+ return true;
case X86::CMP64rr:
case X86::CMP32rr:
case X86::CMP16rr:
@@ -3145,6 +3113,55 @@ bool X86InstrInfo::
optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const {
+ // Check whether we can replace SUB with CMP.
+ unsigned NewOpcode = 0;
+ switch (CmpInstr->getOpcode()) {
+ default: break;
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ case X86::SUB64rm:
+ case X86::SUB32rm:
+ case X86::SUB16rm:
+ case X86::SUB8rm:
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr: {
+ if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
+ return false;
+ // There is no use of the destination register, we can replace SUB with CMP.
+ switch (CmpInstr->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
+ case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
+ case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
+ case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
+ case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
+ case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
+ case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
+ case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
+ case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
+ case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
+ case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
+ case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
+ case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
+ case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
+ case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
+ }
+ CmpInstr->setDesc(get(NewOpcode));
+ CmpInstr->RemoveOperand(0);
+ // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
+ if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
+ NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
+ return false;
+ }
+ }
+
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
@@ -3221,12 +3238,15 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
for (++I; I != E; ++I) {
const MachineInstr &Instr = *I;
- if (Instr.modifiesRegister(X86::EFLAGS, TRI)) {
+ bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
+ bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
+ // We should check the usage if this instruction uses and updates EFLAGS.
+ if (!UseEFLAGS && ModifyEFLAGS) {
// It is safe to remove CmpInstr if EFLAGS is updated again.
IsSafe = true;
break;
}
- if (!Instr.readsRegister(X86::EFLAGS, TRI))
+ if (!UseEFLAGS && !ModifyEFLAGS)
continue;
// EFLAGS is used by this instruction.
@@ -3281,7 +3301,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// instructions will be modified.
OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
}
- if (Instr.killsRegister(X86::EFLAGS, TRI)) {
+ if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
+ // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
IsSafe = true;
break;
}
@@ -3319,6 +3340,81 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
return true;
}
+/// optimizeLoadInstr - Try to remove the load by folding it to a register
+/// operand at the use. We fold the load instructions if load defines a virtual
+/// register, the virtual register is used once in the same BB, and the
+/// instructions in-between do not load or store, and have no side effects.
+MachineInstr* X86InstrInfo::
+optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
+ if (FoldAsLoadDefReg == 0)
+ return 0;
+ // To be conservative, if there exists another load, clear the load candidate.
+ if (MI->mayLoad()) {
+ FoldAsLoadDefReg = 0;
+ return 0;
+ }
+
+ // Check whether we can move DefMI here.
+ DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
+ assert(DefMI);
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(this, 0, SawStore))
+ return 0;
+
+ // We try to commute MI if possible.
+ unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
+ for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
+ // Collect information about virtual register operands of MI.
+ unsigned SrcOperandId = 0;
+ bool FoundSrcOperand = false;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != FoldAsLoadDefReg)
+ continue;
+ // Do not fold if we have a subreg use or a def or multiple uses.
+ if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
+ return 0;
+
+ SrcOperandId = i;
+ FoundSrcOperand = true;
+ }
+ if (!FoundSrcOperand) return 0;
+
+ // Check whether we can fold the def into SrcOperandId.
+ SmallVector<unsigned, 8> Ops;
+ Ops.push_back(SrcOperandId);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ if (FoldMI) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
+ }
+
+ if (Idx == 1) {
+ // MI was changed but it didn't help, commute it back!
+ commuteInstruction(MI, false);
+ return 0;
+ }
+
+ // Check whether we can commute MI and enable folding.
+ if (MI->isCommutable()) {
+ MachineInstr *NewMI = commuteInstruction(MI, false);
+ // Unable to commute.
+ if (!NewMI) return 0;
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ return 0;
+ }
+ }
+ }
+ return 0;
+}
+
/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
/// instruction with two undef reads of the register being defined. This is
/// used for mapping:
@@ -3477,6 +3573,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (i == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
+ } else if (i == 3) {
+ OpcodeTablePtr = &RegOp2MemOpTable3;
}
// If table selected...
@@ -3947,7 +4045,6 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
getUndefRegState(MO.isUndef()));
}
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
- unsigned NewOpc = 0;
switch (DataMI->getOpcode()) {
default: break;
case X86::CMP64ri32:
@@ -3960,8 +4057,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
if (MO1.getImm() == 0) {
+ unsigned NewOpc;
switch (DataMI->getOpcode()) {
- default: break;
+ default: llvm_unreachable("Unreachable!");
case X86::CMP64ri8:
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
case X86::CMP32ri8:
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index ec9b2e6..b6f69af 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -387,6 +387,18 @@ public:
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
+ /// optimizeLoadInstr - Try to remove the load by folding it to a register
+ /// operand at the use. We fold the load instructions if and only if the
+ /// def and use are in the same BB. We only look at one load and see
+ /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+ /// defined by the load we are trying to fold. DefMI returns the machine
+ /// instruction that defines FoldAsLoadDefReg, and the function returns
+ /// the machine instruction generated due to folding.
+ virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index e4edd36..c8f40bb 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -251,7 +251,7 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(iPTR 0))))))],
IIC_MMX_MOVQ_RR>;
-def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector
@@ -259,7 +259,7 @@ def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
IIC_MMX_MOVQ_RR>;
let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [],
IIC_MMX_MOVQ_RR>;
@@ -554,20 +554,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
(int_x86_mmx_pmovmskb VR64:$src))]>;
-// MMX to XMM for vector types
-def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
- [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
- (v2i64 (MOVQI2PQIrm addr:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq
- (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
- (v2i64 (MOVDI2PDIrm addr:$src))>;
-
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c2d169a..220c06d 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -245,9 +245,9 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
// A vector extract of the first f32/f64 position is a subregister copy
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
// A 128-bit subvector extract from the first 256-bit vector position
// is a subregister copy that needs no instruction.
@@ -283,14 +283,14 @@ def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+ (COPY_TO_REGCLASS FR32:$src, VR128)>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+ (COPY_TO_REGCLASS FR64:$src, VR128)>;
// Bitcasts between 128-bit vector types. Return the original type since
// no instruction is needed for the conversion
@@ -562,59 +562,57 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (VMOVSSrr (v4f32 (V_SET0)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (VMOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4f32 (V_SET0)),
- (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>;
}
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
// MOVSDrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
@@ -628,70 +626,68 @@ let Predicates = [HasAVX] in {
sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2f64 (V_SET0)),
- (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2i64 (V_SET0)),
- (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (v2i64 (V_SET0)),
+ (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
- (VMOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
- (VMOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
// Shuffle with VMOVSS
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
(VMOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>;
def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
(VMOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>;
// 256-bit variants
def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
- (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
- (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+ (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
// Shuffle with VMOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// 256-bit variants
def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
- (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
(SUBREG_TO_REG (i32 0),
- (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
- (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+ (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)),
+ sub_xmm)>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
@@ -699,17 +695,13 @@ let Predicates = [HasAVX] in {
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
- sub_sd))>;
+ (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
}
let Predicates = [HasSSE1] in {
@@ -719,37 +711,31 @@ let Predicates = [HasSSE1] in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+ (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+ (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
}
let AddedComplexity = 20 in {
- // MOVSSrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG.
+ // MOVSSrm already zeros the high parts of the register.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+ (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
- (MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+ (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
// Shuffle with MOVSS
def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+ (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
- (MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
}
let Predicates = [HasSSE2] in {
@@ -761,50 +747,46 @@ let Predicates = [HasSSE2] in {
}
let AddedComplexity = 20 in {
- // MOVSDrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG.
+ // MOVSDrm already zeros the high parts of the register.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+ (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
}
// Extract and store.
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+ (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
// Shuffle with MOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+ (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
}
//===----------------------------------------------------------------------===//
@@ -1416,14 +1398,15 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d, OpndItins itins> {
+ X86MemOperand x86memop, string asm, Domain d,
+ OpndItins itins> {
+let neverHasSideEffects = 1 in {
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (OpNode SrcRC:$src))],
- itins.rr, d>;
+ [], itins.rr, d>;
+ let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
- itins.rm, d>;
+ [], itins.rm, d>;
+}
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1443,7 +1426,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
SSE_CVT_SS2SI_32>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}",
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
@@ -1451,7 +1434,7 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
SSE_CVT_SD2SI>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}",
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>,
XD, VEX, VEX_W, VEX_LIG;
@@ -1465,11 +1448,14 @@ defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
XS, VEX_4V, VEX_W, VEX_LIG;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
XD, VEX_4V, VEX_LIG;
-defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
- XD, VEX_4V, VEX_LIG;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG;
+def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>;
+def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+
let Predicates = [HasAVX], AddedComplexity = 1 in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -1519,14 +1505,14 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
// and/or XMM operand(s).
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>;
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1548,30 +1534,31 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
itins.rm>;
}
-defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
+ int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si",
- SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
+ int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}",
+ SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
+ sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
SSE_CVT_Scalar, 0>, XS, VEX_4V;
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss",
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
SSE_CVT_Scalar, 0>, XS, VEX_4V,
VEX_W;
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
SSE_CVT_Scalar, 0>, XD, VEX_4V;
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd",
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
SSE_CVT_Scalar, 0>, XD,
VEX_4V, VEX_W;
@@ -1587,96 +1574,71 @@ let Constraints = "$src1 = $dst" in {
"cvtsi2sd", SSE_CVT_Scalar>, XD;
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
- "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W;
+ "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
}
/// SSE 1 Only
// Aliases for intrinsics
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si",
+ ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, f32mem, load,
- "cvttss2si", SSE_CVT_SS2SI_64>,
- XS, VEX, VEX_W;
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+ XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
- XD, VEX;
+ sdmem, sse_load_f64, "cvttsd2si",
+ SSE_CVT_SD2SI>, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttsd2si", SSE_CVT_SD2SI>,
- XD, VEX, VEX_W;
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si{q}", SSE_CVT_SD2SI>,
+ XD, VEX, VEX_W;
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si",
+ ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse_cvttss2si64, f32mem, load,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>,
- XS, REX_W;
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
- XD;
+ sdmem, sse_load_f64, "cvttsd2si",
+ SSE_CVT_SD2SI>, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
- int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttsd2si{q}", SSE_CVT_SD2SI>,
- XD, REX_W;
-
-let Pattern = []<dag>, neverHasSideEffects = 1 in {
-defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
-defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
- "cvtss2si\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+
+defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si{l}",
+ SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
+defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si{q}",
+ SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
+
+defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si{l}",
+ SSE_CVT_SS2SI_32>, XS;
+defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si{q}",
+ SSE_CVT_SS2SI_64>, XS, REX_W;
+
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
- Requires<[HasAVX]>;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, VEX, Requires<[HasAVX]>;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
- Requires<[HasAVX]>;
-}
-
-let Pattern = []<dag>, neverHasSideEffects = 1 in {
-defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_32>, XS;
-defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- SSE_CVT_SS2SI_64>, XS, REX_W;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB,
- Requires<[HasSSE2]>;
-}
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, VEX, Requires<[HasAVX]>;
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse_cvtss2si VR128:$src),
- (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
- (VCVTSS2SIrm addr:$src)>;
- def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
- (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
- (VCVTSS2SI64rm addr:$src)>;
-}
-
-let Predicates = [HasSSE1] in {
- def : Pat<(int_x86_sse_cvtss2si VR128:$src),
- (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
- (CVTSS2SIrm addr:$src)>;
- def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
- (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
- def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
- (CVTSS2SI64rm addr:$src)>;
-}
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>,
+ TB, Requires<[HasSSE2]>;
/// SSE 2 Only
// Convert scalar double to scalar single
+let neverHasSideEffects = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@@ -1687,6 +1649,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
Requires<[HasAVX]>;
@@ -1702,17 +1665,37 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
XD,
Requires<[HasSSE2, OptForSize]>;
-defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
- SSE_CVT_Scalar, 0>,
- XS, VEX_4V;
-let Constraints = "$src1 = $dst" in
-defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
- SSE_CVT_Scalar>, XS;
+def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+ VR128:$src1, sse_load_f64:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+
+let Constraints = "$src1 = $dst" in {
+def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>;
+def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+ VR128:$src1, sse_load_f64:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>;
+}
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
+let neverHasSideEffects = 1 in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1724,19 +1707,21 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+}
-let Predicates = [HasAVX] in {
+let AddedComplexity = 1 in { // give AVX priority
def : Pat<(f64 (fextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>;
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-}
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
- Requires<[HasAVX, OptForSpeed]>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+ def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
+} // AddedComplexity = 1
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1762,67 +1747,60 @@ def : Pat<(extloadf32 addr:$src),
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V,
- Requires<[HasAVX]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V,
- Requires<[HasAVX]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[HasSSE2]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))],
- IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[HasSSE2]>;
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>;
}
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
IIC_SSE_CVT_PS_RR>, VEX;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>;
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
- (VCVTPS2DQrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
- (VCVTPS2DQrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
- def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
- (CVTPS2DQrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
- (CVTPS2DQrm addr:$src)>;
-}
// Convert Packed Double FP to Packed DW Integers
let Predicates = [HasAVX] in {
@@ -1830,77 +1808,74 @@ let Predicates = [HasAVX] in {
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ VEX;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTPD2DQrr VR128:$dst, VR128:$src)>;
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
+ VEX, VEX_L;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
}
def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>;
def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>;
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
- (VCVTPD2DQrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
- (VCVTPD2DQXrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
- def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
- (CVTPD2DQrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
- (CVTPD2DQrm addr:$src)>;
-}
-
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
-def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
-def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
-def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX;
+def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
- (memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-
-def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
-def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
+def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+ IIC_SSE_CVT_PS_RR>, VEX;
+def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
+ (memopv8f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, VEX;
+
+def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
+ IIC_SSE_CVT_PS_RR>;
+def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1952,16 +1927,6 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
(int_x86_sse2_cvttpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
-def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
-
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -1977,10 +1942,14 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -1992,82 +1961,82 @@ let Predicates = [HasAVX] in {
(VCVTTPD2DQYrm addr:$src)>;
} // Predicates = [HasAVX]
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
+ IIC_SSE_CVT_PD_RR>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memopv2f64 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>;
+
// Convert packed single to packed double
let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
+let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, TB, VEX;
}
let Predicates = [HasSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}", [],
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB;
+let neverHasSideEffects = 1, mayLoad = 1 in
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB;
}
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
- (VCVTPS2PDrr VR128:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
- def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
- (CVTPS2PDrr VR128:$src)>;
-}
-
// Convert Packed DW Integers to Packed Double FP
let Predicates = [HasAVX] in {
-def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+let neverHasSideEffects = 1, mayLoad = 1 in
+def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ []>, VEX;
+def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtdq2_pd_256
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX;
+def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX;
}
-def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+let neverHasSideEffects = 1, mayLoad = 1 in
+def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>;
-def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
IIC_SSE_CVT_PD_RM>;
-// 128 bit register conversion intrinsics
-let Predicates = [HasAVX] in
-def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
- (VCVTDQ2PDrr VR128:$src)>;
-
-let Predicates = [HasSSE2] in
-def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
- (CVTDQ2PDrr VR128:$src)>;
-
// AVX 256-bit register conversion intrinsics
let Predicates = [HasAVX] in {
- def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
- (VCVTDQ2PDYrr VR128:$src)>;
- def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
- (VCVTDQ2PDYrm addr:$src)>;
-
- def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
- (VCVTPD2DQYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTPD2DQYrm addr:$src)>;
-
def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PDYrr VR128:$src)>;
def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
@@ -2079,48 +2048,44 @@ let Predicates = [HasAVX] in {
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
(VCVTPD2PSrr VR128:$dst, VR128:$src)>;
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2psx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
IIC_SSE_CVT_PD_RR>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>;
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
- (VCVTPD2PSrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
- (VCVTPD2PSXrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
- def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
- (CVTPD2PSrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
- (CVTPD2PSrm addr:$src)>;
-}
-
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
// whenever possible to avoid declaring two versions of each one.
@@ -2130,38 +2095,26 @@ let Predicates = [HasAVX] in {
def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
- def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
- (VCVTPD2PSYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
- (VCVTPD2PSYrm addr:$src)>;
-
- def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
- (VCVTPS2DQYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
- (VCVTPS2DQYrm addr:$src)>;
-
- def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
- (VCVTPS2PDYrr VR128:$src)>;
- def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
- (VCVTPS2PDYrm addr:$src)>;
-
- def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
- (VCVTTPD2DQYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTTPD2DQYrm addr:$src)>;
-
// Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
(VCVTPD2PSYrm addr:$src)>;
+ def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+ (VCVTPS2PDrr VR128:$src)>;
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
(VCVTPS2PDYrr VR128:$src)>;
def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
(VCVTPS2PDYrm addr:$src)>;
}
+let Predicates = [HasSSE2] in {
+ // Match fextend for 128 conversions
+ def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+ (CVTPS2PDrr VR128:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
@@ -2593,17 +2546,13 @@ let Predicates = [HasAVX] in {
OpSize, VEX;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>;
+ (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>;
+ (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>;
+ (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>;
+ (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>;
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
@@ -2628,17 +2577,17 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>, Requires<[HasSSE1]>;
+ (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ Requires<[HasSSE1]>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
- sub_ss))>, Requires<[HasSSE1]>;
+ (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ Requires<[HasSSE1]>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>, Requires<[HasSSE2]>;
+ (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ Requires<[HasSSE2]>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
- sub_sd))>, Requires<[HasSSE2]>;
+ (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ Requires<[HasSSE2]>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
@@ -2923,7 +2872,8 @@ let isCommutable = 0 in {
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V;
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+ VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
VEX_4V, VEX_LIG;
@@ -2974,6 +2924,23 @@ let Constraints = "$src1 = $dst" in {
}
}
+let isCommutable = 1, isCodeGenOnly = 1 in {
+ defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
+ defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
+ let Constraints = "$src1 = $dst" in {
+ defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+ defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ }
+}
+
/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the
@@ -3236,34 +3203,30 @@ def : Pat<(f32 (X86frcp (load addr:$src))),
let Predicates = [HasAVX], AddedComplexity = 1 in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VSQRTSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
(VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
- (VSQRTSDr (f64 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
- sub_sd)>;
+ (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR64)),
+ VR128)>;
def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
(VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VRSQRTSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
(VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (VRCPSSr (f32 (IMPLICIT_DEF)),
- (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
- sub_ss)>;
+ (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128:$src, FR32)),
+ VR128)>;
def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
(VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
}
@@ -4609,7 +4572,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
// Bitcast FR64 <-> GR64
//
let Predicates = [HasAVX] in
-def VMOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
VEX;
@@ -4622,7 +4585,7 @@ def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, VEX;
-def MOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
IIC_SSE_MOVDQ>;
@@ -5505,16 +5468,14 @@ let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
Requires<[HasSSE3]>;
-def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
- [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
- Requires<[HasSSE3]>;
}
let Uses = [EAX, ECX, EDX] in
def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
TB, Requires<[HasSSE3]>;
let Uses = [ECX, EAX] in
-def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>,
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
TB, Requires<[HasSSE3]>;
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
@@ -6906,81 +6867,42 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
}
// Packed Compare Implicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS] in {
- multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
+ multiclass SS42AI_pcmpistri<string asm> {
def rr : SS42AI<0x63, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
+ let mayLoad = 1 in
def rm : SS42AI<0x63, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
- [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
}
}
-let Predicates = [HasAVX] in {
-defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
- VEX;
-defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
- VEX;
-}
-
-defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
-defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
-defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
-defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
-defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
-defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+let Predicates = [HasAVX] in
+defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
// Packed Compare Explicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
- multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+ multiclass SS42AI_pcmpestri<string asm> {
def rr : SS42AI<0x61, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
+ let mayLoad = 1 in
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
!strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
- [(set ECX,
- (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
+ []>, OpSize;
}
}
-let Predicates = [HasAVX] in {
-defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
- VEX;
-defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
- VEX;
-}
-
-defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
-defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
-defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
-defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
-defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
-defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+let Predicates = [HasAVX] in
+defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
//===----------------------------------------------------------------------===//
// SSE4.2 - CRC Instructions
@@ -7727,24 +7649,18 @@ let Predicates = [HasAVX2] in {
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSrr
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+ (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSYrr
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VBROADCASTSDYrr
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSrr
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
+ (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSYrr
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDYrr
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
}
}
@@ -7768,46 +7684,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
let AddedComplexity = 20 in {
// 128bit broadcasts:
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
- 0), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
- 0x44),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
- 0x44), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss),
- 0), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
- (VPSHUFDri
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
- 0x44),
- sub_xmm),
- (VPSHUFDri
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
- 0x44), 1)>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
}
}
@@ -8052,7 +7948,7 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
[]>, VEX_4VOp3, VEX_L;
}
-let Constraints = "$src1 = $dst, $mask = $mask_wb" in {
+let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 0168d12..7ac4cec 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -532,6 +532,15 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
#endif
}
+template<typename T> void addUnaligned(void *Pos, T Delta) {
+ T Value;
+ std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos),
+ sizeof(T));
+ Value += Delta;
+ std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value),
+ sizeof(T));
+}
+
/// relocate - Before the JIT can run a block of code that has been emitted,
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
@@ -545,24 +554,24 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
// PC relative relocation, add the relocated value to the value already in
// memory, after we adjust it for where the PC is.
ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
}
case X86::reloc_picrel_word: {
// PIC base relative relocation, add the relocated value to the value
// already in memory, after we adjust it for where the PIC base is.
ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
}
case X86::reloc_absolute_word:
case X86::reloc_absolute_word_sext:
// Absolute relocation, just add the relocated value to the value already
// in memory.
- *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ addUnaligned<unsigned>(RelocPos, ResultPtr);
break;
case X86::reloc_absolute_dword:
- *((intptr_t*)RelocPos) += ResultPtr;
+ addUnaligned<intptr_t>(RelocPos, ResultPtr);
break;
}
}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index c76d3cc..d7c08df 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -65,7 +65,7 @@ namespace llvm {
/// referenced global symbols.
virtual void relocate(void *Function, MachineRelocation *MR,
unsigned NumRelocs, unsigned char* GOTBase);
-
+
/// allocateThreadLocalMemory - Each target has its own way of
/// handling thread local variables. This method returns a value only
/// meaningful to the target.
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index df7507c..9c0ce4e 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -46,12 +46,12 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
SmallString<128> Name;
-
+
if (!MO.isGlobal()) {
assert(MO.isSymbol());
Name += MAI.getGlobalPrefix();
Name += MO.getSymbolName();
- } else {
+ } else {
const GlobalValue *GV = MO.getGlobal();
bool isImplicitlyPrivate = false;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
@@ -59,7 +59,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
isImplicitlyPrivate = true;
-
+
Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
}
@@ -110,7 +110,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
getMachOMMI().getFnStubEntry(Sym);
if (StubSym.getPointer())
return Sym;
-
+
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
@@ -135,7 +135,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// lot of extra uniquing.
const MCExpr *Expr = 0;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
-
+
switch (MO.getTargetFlags()) {
default: llvm_unreachable("Unknown target flag on GV operand");
case X86II::MO_NO_FLAG: // No flag.
@@ -144,7 +144,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DLLIMPORT:
case X86II::MO_DARWIN_STUB:
break;
-
+
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
case X86II::MO_TLVP_PIC_BASE:
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
@@ -173,7 +173,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
Expr = MCSymbolRefExpr::Create(Sym, Ctx);
// Subtract the pic base.
- Expr = MCBinaryExpr::CreateSub(Expr,
+ Expr = MCBinaryExpr::CreateSub(Expr,
MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
Ctx);
if (MO.isJTI() && MAI.hasSetDirective()) {
@@ -187,10 +187,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
}
break;
}
-
+
if (Expr == 0)
Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
-
+
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(MO.getOffset(), Ctx),
@@ -211,10 +211,10 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
// Convert registers in the addr mode according to subreg64.
for (unsigned i = 0; i != 4; ++i) {
if (!MI->getOperand(OpNo+i).isReg()) continue;
-
+
unsigned Reg = MI->getOperand(OpNo+i).getReg();
if (Reg == 0) continue;
-
+
MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
}
}
@@ -280,7 +280,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
return;
// Check whether this is an absolute address.
- // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
// to do this here.
bool Absolute = true;
if (Inst.getOperand(AddrOp).isExpr()) {
@@ -289,7 +289,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
Absolute = false;
}
-
+
if (Absolute &&
(Inst.getOperand(AddrBase + 0).getReg() != 0 ||
Inst.getOperand(AddrBase + 2).getReg() != 0 ||
@@ -306,10 +306,10 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
-
+
MCOperand MCOp;
switch (MO.getType()) {
default:
@@ -345,10 +345,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
// Ignore call clobbers.
continue;
}
-
+
OutMI.addOperand(MCOp);
}
-
+
// Handle a few special cases to eliminate operand modifiers.
ReSimplify:
switch (OutMI.getOpcode()) {
@@ -425,7 +425,7 @@ ReSimplify:
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
}
-
+
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
OutMI.setOpcode(Opcode);
@@ -445,7 +445,7 @@ ReSimplify:
case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
-
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -688,7 +688,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// call "L1$pb"
// "L1$pb":
// popl %esi
-
+
// Emit the call.
MCSymbol *PICBase = MF->getPICBaseSymbol();
TmpInst.setOpcode(X86::CALLpcrel32);
@@ -697,43 +697,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
OutContext)));
OutStreamer.EmitInstruction(TmpInst);
-
+
// Emit the label.
OutStreamer.EmitLabel(PICBase);
-
+
// popl $reg
TmpInst.setOpcode(X86::POP32r);
TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
OutStreamer.EmitInstruction(TmpInst);
return;
}
-
+
case X86::ADD32ri: {
// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
break;
-
+
// Okay, we have something like:
// EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
-
+
// For this, we want to print something like:
// MYGLOBAL + (. - PICBASE)
// However, we can't generate a ".", so just emit a new label here and refer
// to it.
MCSymbol *DotSym = OutContext.CreateTempSymbol();
OutStreamer.EmitLabel(DotSym);
-
+
// Now that we have emitted the label, lower the complex operand expression.
MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
-
+
const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
const MCExpr *PICBase =
MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
-
- DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
+
+ DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
DotExpr, OutContext);
-
+
MCInst TmpInst;
TmpInst.setOpcode(X86::ADD32ri);
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
@@ -743,7 +743,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
}
-
+
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 40df3db..b4d4cfd 100644
--- a/lib/Target/X86/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -25,7 +25,7 @@ namespace llvm {
class Mangler;
class TargetMachine;
class X86AsmPrinter;
-
+
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
@@ -37,12 +37,12 @@ class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
public:
X86MCInstLower(Mangler *mang, const MachineFunction &MF,
X86AsmPrinter &asmprinter);
-
+
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
+
private:
MachineModuleInfoMachO &getMachOMMI() const;
};
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index f83a525..78d20ce 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -24,7 +24,7 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
/// ForceFramePointer - True if the function is required to use of frame
- /// pointer for reasons other than it containing dynamic allocation or
+ /// pointer for reasons other than it containing dynamic allocation or
/// that FP eliminatation is turned off. For example, Cygwin main function
/// contains stack pointer re-alignment code which requires FP.
bool ForceFramePointer;
@@ -83,7 +83,7 @@ public:
VarArgsFPOffset(0),
ArgumentStackSize(0),
NumLocalDynamics(0) {}
-
+
explicit X86MachineFunctionInfo(MachineFunction &MF)
: ForceFramePointer(false),
CalleeSavedFrameSize(0),
@@ -99,7 +99,7 @@ public:
ArgumentStackSize(0),
NumLocalDynamics(0) {}
- bool getForceFramePointer() const { return ForceFramePointer;}
+ bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index acf53f8..877b8f6 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -72,13 +72,15 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
SlotSize = 8;
StackPtr = X86::RSP;
FramePtr = X86::RBP;
- BasePtr = X86::RBX;
} else {
SlotSize = 4;
StackPtr = X86::ESP;
FramePtr = X86::EBP;
- BasePtr = X86::EBX;
}
+ // Use a callee-saved register as the base pointer. These registers must
+ // not conflict with any ABI requirements. For example, in 32-bit mode PIC
+ // requires GOT in the EBX register before function calls via PLT GOT pointer.
+ BasePtr = Is64Bit ? X86::RBX : X86::ESI;
}
/// getCompactUnwindRegNum - This function maps the register to the number for
@@ -366,7 +368,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
if (!EnableBasePointer)
return false;
- // When we need stack realignment and there are dynamic allocas, we can't
+ // When we need stack realignment and there are dynamic allocas, we can't
// reference off of the stack pointer, so we reserve a base pointer.
if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
return true;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index ae2d4d0..edc7184 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -23,9 +23,6 @@ let Namespace = "X86" in {
def sub_8bit_hi : SubRegIndex;
def sub_16bit : SubRegIndex;
def sub_32bit : SubRegIndex;
-
- def sub_ss : SubRegIndex;
- def sub_sd : SubRegIndex;
def sub_xmm : SubRegIndex;
@@ -163,8 +160,6 @@ let Namespace = "X86" in {
def FP6 : Register<"fp6">;
// XMM Registers, used by the various SSE instruction set extensions.
- // The sub_ss and sub_sd subregs are the same registers with another regclass.
- let CompositeIndices = [(sub_ss), (sub_sd)] in {
def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
@@ -184,7 +179,7 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
- }}
+ } // CostPerUse
// YMM Registers, used by AVX instructions
let SubRegIndices = [sub_xmm] in {
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
index 857becf..0333056 100644
--- a/lib/Target/X86/X86Relocations.h
+++ b/lib/Target/X86/X86Relocations.h
@@ -21,7 +21,7 @@ namespace llvm {
/// RelocationType - An enum for the x86 relocation codes. Note that
/// the terminology here doesn't follow x86 convention - word means
/// 32-bit and dword means 64-bit. The relocations will be treated
- /// by JIT or ObjectCode emitters, this is transparent to the x86 code
+ /// by JIT or ObjectCode emitters, this is transparent to the x86 code
/// emitter but JIT and ObjectCode will treat them differently
enum RelocationType {
/// reloc_pcrel_word - PC relative relocation, add the relocated value to
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 7c6788f..00edcbc 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -38,7 +38,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
return SDValue();
-
+
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e6e9c56..9087852 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -39,10 +39,10 @@ unsigned char X86Subtarget::
ClassifyBlockAddressReference() const {
if (isPICStyleGOT()) // 32-bit ELF targets.
return X86II::MO_GOTOFF;
-
+
if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
return X86II::MO_PIC_BASE_OFFSET;
-
+
// Direct static reference to label.
return X86II::MO_NO_FLAG;
}
@@ -69,7 +69,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// Large model never uses stubs.
if (TM.getCodeModel() == CodeModel::Large)
return X86II::MO_NO_FLAG;
-
+
if (isTargetDarwin()) {
// If symbol visibility is hidden, the extra load is not needed if
// target is x86-64 or the symbol is definitely defined in the current
@@ -87,18 +87,18 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
return X86II::MO_NO_FLAG;
}
-
+
if (isPICStyleGOT()) { // 32-bit ELF targets.
// Extra load is needed for all externally visible.
if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
return X86II::MO_GOTOFF;
return X86II::MO_GOT;
}
-
+
if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
// Determine whether we have a stub reference and/or whether the reference
// is relative to the PIC base or not.
-
+
// If this is a strong reference to a definition, it is definitely not
// through a stub.
if (!isDecl && !GV->isWeakForLinker())
@@ -108,26 +108,26 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// normal $non_lazy_ptr stub because this symbol might be resolved late.
if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
-
+
// If symbol visibility is hidden, we have a stub for common symbol
// references and external declarations.
if (isDecl || GV->hasCommonLinkage()) {
// Hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
}
-
+
// Otherwise, no stub.
return X86II::MO_PIC_BASE_OFFSET;
}
-
+
if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
// Determine whether we have a stub reference.
-
+
// If this is a strong reference to a definition, it is definitely not
// through a stub.
if (!isDecl && !GV->isWeakForLinker())
return X86II::MO_NO_FLAG;
-
+
// Unless we have a symbol with hidden visibility, we have to go through a
// normal $non_lazy_ptr stub because this symbol might be resolved late.
if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
@@ -136,7 +136,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// Otherwise, no stub.
return X86II::MO_NO_FLAG;
}
-
+
// Direct static reference to global.
return X86II::MO_NO_FLAG;
}
@@ -246,8 +246,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
// If it's Nehalem, unaligned memory access is fast.
- // FIXME: Nehalem is family 6. Also include Westmere and later processors?
- if (Family == 15 && Model == 26) {
+ // Include Westmere and Sandy Bridge as well.
+ // FIXME: add later processors.
+ if (IsIntel && ((Family == 6 && Model == 26) ||
+ (Family == 6 && Model == 44) ||
+ (Family == 6 && Model == 42))) {
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
@@ -315,7 +318,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
+ const std::string &FS,
unsigned StackAlignOverride, bool is64Bit)
: X86GenSubtargetInfo(TT, CPU, FS)
, X86ProcFamily(Others)
@@ -397,10 +400,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
}
}
- if (X86ProcFamily == IntelAtom) {
+ if (X86ProcFamily == IntelAtom)
PostRAScheduler = true;
- InstrItins = getInstrItineraryForCPU(CPUName);
- }
+
+ InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
// target data structure which is shared with MC code emitter, etc.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 1af585f..6841c5b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -55,7 +55,7 @@ protected:
/// X86ProcFamily - X86 processor family: Intel Atom, and others
X86ProcFamilyEnum X86ProcFamily;
-
+
/// PICStyle - Which PIC style to use
///
PICStyles::Style PICStyle;
@@ -149,7 +149,7 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
-
+
/// Instruction itineraries for scheduling
InstrItineraryData InstrItins;
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index e4f567f..80b75dc 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -222,7 +222,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
DebugLoc dl = I->getDebugLoc();
bool isControlFlow = MI->isCall() || MI->isReturn();
- // Shortcut: don't need to check regular instructions in dirty state.
+ // Shortcut: don't need to check regular instructions in dirty state.
if (!isControlFlow && CurState == ST_DIRTY)
continue;
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 3dbc3b9..a4e5647 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -371,8 +371,3 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
false));
}
}
-
-void XCoreFrameLowering::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-
-}
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index afa2773..db1bbb6 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -44,8 +44,6 @@ namespace llvm {
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
//! Stack slot size (4 bytes)
static int stackSlotSize() {
return 4;