aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp349
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp5
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp51
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp74
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp32
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp236
-rw-r--r--lib/Target/ARM/ARMISelLowering.h8
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td108
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td36
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td1
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td90
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp2
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp198
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp52
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp2
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp6
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp13
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp952
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h437
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp35
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp52
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h5
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td1
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td1
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp21
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp2
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp17
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp859
-rw-r--r--lib/Target/Mips/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp11
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp2
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp5
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h4
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td14
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.cpp2
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp41
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp3
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp11
-rw-r--r--lib/Target/Mips/MipsDirectObjLower.cpp86
-rw-r--r--lib/Target/Mips/MipsDirectObjLower.h28
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp5
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp6
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp43
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h12
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td27
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp234
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp29
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h1
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp14
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h4
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp47
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h8
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp13
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h5
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp6
-rw-r--r--lib/Target/Mips/MipsSubtarget.h6
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp6
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp1
-rw-r--r--lib/Target/PowerPC/PPC.td10
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp48
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp9
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp74
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td59
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td93
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td88
-rw-r--r--lib/Target/PowerPC/PPCSchedule440.td60
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td81
-rw-r--r--lib/Target/PowerPC/PPCScheduleE500mc.td265
-rw-r--r--lib/Target/PowerPC/PPCScheduleE5500.td309
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td8
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td10
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td2
-rw-r--r--lib/Target/TargetLibraryInfo.cpp57
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp58
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp12
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c8
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h10
-rw-r--r--lib/Target/X86/README-SSE.txt12
-rw-r--r--lib/Target/X86/X86.td6
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp8
-rw-r--r--lib/Target/X86/X86AsmPrinter.h2
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp4
-rw-r--r--lib/Target/X86/X86FastISel.cpp10
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp7
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp488
-rw-r--r--lib/Target/X86/X86InstrControl.td7
-rw-r--r--lib/Target/X86/X86InstrFMA.td319
-rw-r--r--lib/Target/X86/X86InstrFormats.td74
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td8
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp223
-rw-r--r--lib/Target/X86/X86InstrInfo.td9
-rw-r--r--lib/Target/X86/X86InstrMMX.td65
-rw-r--r--lib/Target/X86/X86InstrSSE.td670
-rw-r--r--lib/Target/X86/X86JITInfo.cpp2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp6
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--lib/Target/X86/X86Subtarget.h9
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp10
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td4
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp2
119 files changed, 5857 insertions, 1726 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 29033e5..e2f0d7d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -683,7 +683,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Handle register classes that require multiple instructions.
unsigned BeginIdx = 0;
unsigned SubRegs = 0;
- unsigned Spacing = 1;
+ int Spacing = 1;
// Use VORRq when possible.
if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
@@ -705,27 +705,38 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
- if (Opc) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MachineInstrBuilder Mov;
- for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
- assert(Dst && Src && "Bad sub-register");
- Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
- .addReg(Src));
- // VORR takes two source operands.
- if (Opc == ARM::VORRq)
- Mov.addReg(Src);
- }
- // Add implicit super-register defs and kills to the last instruction.
- Mov->addRegisterDefined(DestReg, TRI);
- if (KillSrc)
- Mov->addRegisterKilled(SrcReg, TRI);
- return;
- }
+ assert(Opc && "Impossible reg-to-reg copy");
- llvm_unreachable("Impossible reg-to-reg copy");
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstrBuilder Mov;
+
+ // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
+ if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
+ BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+ Spacing = -Spacing;
+ }
+#ifndef NDEBUG
+ SmallSet<unsigned, 4> DstRegs;
+#endif
+ for (unsigned i = 0; i != SubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ assert(Dst && Src && "Bad sub-register");
+#ifndef NDEBUG
+ assert(!DstRegs.count(Src) && "destructive vector copy");
+ DstRegs.insert(Dst);
+#endif
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
+ .addReg(Src);
+ // VORR takes two source operands.
+ if (Opc == ARM::VORRq)
+ Mov.addReg(Src);
+ Mov = AddDefaultPred(Mov);
+ }
+ // Add implicit super-register defs and kills to the last instruction.
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
}
static const
@@ -1569,16 +1580,20 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
}
/// Identify instructions that can be folded into a MOVCC instruction, and
-/// return the corresponding opcode for the predicated pseudo-instruction.
-static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
- const MachineRegisterInfo &MRI) {
+/// return the defining instruction.
+static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return 0;
if (!MRI.hasOneNonDBGUse(Reg))
return 0;
- MI = MRI.getVRegDef(Reg);
+ MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return 0;
+ // MI is folded into the MOVCC by predicating it.
+ if (!MI->isPredicable())
+ return 0;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
@@ -1588,55 +1603,18 @@ static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
return 0;
if (!MO.isReg())
continue;
+ // MI can't have any tied operands, that would conflict with predication.
+ if (MO.isTied())
+ return 0;
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
return 0;
if (MO.isDef() && !MO.isDead())
return 0;
}
- switch (MI->getOpcode()) {
- default: return 0;
- case ARM::ANDri: return ARM::ANDCCri;
- case ARM::ANDrr: return ARM::ANDCCrr;
- case ARM::ANDrsi: return ARM::ANDCCrsi;
- case ARM::ANDrsr: return ARM::ANDCCrsr;
- case ARM::t2ANDri: return ARM::t2ANDCCri;
- case ARM::t2ANDrr: return ARM::t2ANDCCrr;
- case ARM::t2ANDrs: return ARM::t2ANDCCrs;
- case ARM::EORri: return ARM::EORCCri;
- case ARM::EORrr: return ARM::EORCCrr;
- case ARM::EORrsi: return ARM::EORCCrsi;
- case ARM::EORrsr: return ARM::EORCCrsr;
- case ARM::t2EORri: return ARM::t2EORCCri;
- case ARM::t2EORrr: return ARM::t2EORCCrr;
- case ARM::t2EORrs: return ARM::t2EORCCrs;
- case ARM::ORRri: return ARM::ORRCCri;
- case ARM::ORRrr: return ARM::ORRCCrr;
- case ARM::ORRrsi: return ARM::ORRCCrsi;
- case ARM::ORRrsr: return ARM::ORRCCrsr;
- case ARM::t2ORRri: return ARM::t2ORRCCri;
- case ARM::t2ORRrr: return ARM::t2ORRCCrr;
- case ARM::t2ORRrs: return ARM::t2ORRCCrs;
-
- // ARM ADD/SUB
- case ARM::ADDri: return ARM::ADDCCri;
- case ARM::ADDrr: return ARM::ADDCCrr;
- case ARM::ADDrsi: return ARM::ADDCCrsi;
- case ARM::ADDrsr: return ARM::ADDCCrsr;
- case ARM::SUBri: return ARM::SUBCCri;
- case ARM::SUBrr: return ARM::SUBCCrr;
- case ARM::SUBrsi: return ARM::SUBCCrsi;
- case ARM::SUBrsr: return ARM::SUBCCrsr;
-
- // Thumb2 ADD/SUB
- case ARM::t2ADDri: return ARM::t2ADDCCri;
- case ARM::t2ADDri12: return ARM::t2ADDCCri12;
- case ARM::t2ADDrr: return ARM::t2ADDCCrr;
- case ARM::t2ADDrs: return ARM::t2ADDCCrs;
- case ARM::t2SUBri: return ARM::t2SUBCCri;
- case ARM::t2SUBri12: return ARM::t2SUBCCri12;
- case ARM::t2SUBrr: return ARM::t2SUBCCrr;
- case ARM::t2SUBrs: return ARM::t2SUBCCrs;
- }
+ bool DontMoveAcrossStores = true;
+ if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
+ return 0;
+ return MI;
}
bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
@@ -1665,19 +1643,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- MachineInstr *DefMI = 0;
- unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
- bool Invert = !Opc;
- if (!Opc)
- Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
- if (!Opc)
+ MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
+ bool Invert = !DefMI;
+ if (!DefMI)
+ DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
+ if (!DefMI)
return 0;
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- get(Opc), MI->getOperand(0).getReg())
- .addOperand(MI->getOperand(Invert ? 2 : 1));
+ DefMI->getDesc(),
+ MI->getOperand(0).getReg());
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1696,6 +1673,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (NewMI->hasOptionalDef())
AddDefaultCC(NewMI);
+ // The output register value when the predicate is false is an implicit
+ // register operand tied to the first def.
+ // The tie makes the register allocator ensure the FalseReg is allocated the
+ // same register as operand 0.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ FalseReg.setImplicit();
+ NewMI->addOperand(FalseReg);
+ NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
+
// The caller will erase MI, but not DefMI.
DefMI->eraseFromParent();
return NewMI;
@@ -2042,13 +2028,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
- if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
MI = 0;
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
UE = MRI->use_end(); UI != UE; ++UI) {
if (UI->getParent() != CmpInstr->getParent()) continue;
MachineInstr *PotentialAND = &*UI;
- if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
+ isPredicated(PotentialAND))
continue;
MI = PotentialAND;
break;
@@ -2114,6 +2101,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// The single candidate is called MI.
if (!MI) MI = Sub;
+ // We can't use a predicated instruction - it doesn't always write the flags.
+ if (isPredicated(MI))
+ return false;
+
switch (MI->getOpcode()) {
default: break;
case ARM::RSBrr:
@@ -2220,6 +2211,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
+ assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
CmpInstr->eraseFromParent();
// Modify the condition code of operands in OperandsToUpdate.
@@ -3366,7 +3358,8 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
// converted.
if (Subtarget.isCortexA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
- MI->getOpcode() == ARM::VMOVSR))
+ MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
// No other instructions can be swizzled, so just determine their domain.
@@ -3386,13 +3379,28 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
return std::make_pair(ExeGeneric, 0);
}
+static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
+ unsigned SReg, unsigned &Lane) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+
+ if (DReg != ARM::NoRegister)
+ return DReg;
+
+ Lane = 1;
+ DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
+
+ assert(DReg && "S-register with no D super-register?");
+ return DReg;
+}
+
+
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
MachineInstrBuilder MIB(MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
- bool isKill;
switch (MI->getOpcode()) {
default:
llvm_unreachable("cannot handle opcode!");
@@ -3403,78 +3411,175 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- // Change to a VORRd which requires two identical use operands.
- MI->setDesc(get(ARM::VORRd));
+ // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
- // Add the extra source operand and new predicates.
- // This will go before any implicit ops.
- AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VORRd));
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(SrcReg)
+ .addReg(SrcReg));
break;
case ARM::VMOVRS:
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+ // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
- DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
- Lane = 0;
- if (DReg == ARM::NoRegister) {
- DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
- Lane = 1;
- assert(DReg && "S-register with no D super-register?");
- }
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- MI->RemoveOperand(1);
+ DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
+ // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
+ // Note that DSrc has been widened and the other lane may be undef, which
+ // contaminates the entire register.
MI->setDesc(get(ARM::VGETLNi32));
- MIB.addReg(DReg);
- MIB.addImm(Lane);
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane));
- MIB->getOperand(1).setIsUndef();
+ // The old source should be an implicit use, otherwise we might think it
+ // was dead before here.
MIB.addReg(SrcReg, RegState::Implicit);
-
- AddDefaultPred(MIB);
break;
case ARM::VMOVSR:
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+ // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
- DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
- Lane = 0;
- if (DReg == ARM::NoRegister) {
- DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
- Lane = 1;
- assert(DReg && "S-register with no D super-register?");
- }
- isKill = MI->getOperand(0).isKill();
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- MI->RemoveOperand(1);
- MI->RemoveOperand(0);
+ DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
+ // If we insert both a novel <def> and an <undef> on the DReg, we break
+ // any existing dependency chain on the unused lane. Either already being
+ // present means this instruction is in that chain anyway so we can make
+ // the transformation.
+ if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
+ // Again DDst may be undefined at the beginning of this instruction.
MI->setDesc(get(ARM::VSETLNi32));
- MIB.addReg(DReg, RegState::Define);
- MIB.addReg(DReg, RegState::Undef);
- MIB.addReg(SrcReg);
- MIB.addImm(Lane);
+ MIB.addReg(DReg, RegState::Define)
+ .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
+ .addReg(SrcReg)
+ .addImm(Lane);
+ AddDefaultPred(MIB);
- if (isKill)
- MIB->addRegisterKilled(DstReg, TRI, true);
- MIB->addRegisterDefined(DstReg, TRI);
+ // The narrower destination must be marked as set to keep previous chains
+ // in place.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ break;
+ case ARM::VMOVS: {
+ if (Domain != ExeNEON)
+ break;
+ // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
+ DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
+ DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
+
+ // If we insert both a novel <def> and an <undef> on the DReg, we break
+ // any existing dependency chain on the unused lane. Either already being
+ // present means this instruction is in that chain anyway so we can make
+ // the transformation.
+ if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ if (DSrc == DDst) {
+ // Destination can be:
+ // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VDUPLN32d));
+ MIB.addReg(DDst, RegState::Define)
+ .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
+ .addImm(SrcLane);
+ AddDefaultPred(MIB);
+
+ // Neither the source or the destination are naturally represented any
+ // more, so add them in manually.
+ MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, RegState::Implicit);
+ break;
+ }
+
+ // In general there's no single instruction that can perform an S <-> S
+ // move in NEON space, but a pair of VEXT instructions *can* do the
+ // job. It turns out that the VEXTs needed will only use DSrc once, with
+ // the position based purely on the combination of lane-0 and lane-1
+ // involved. For example
+ // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
+ // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
+ // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
+ // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
+ //
+ // Pattern of the MachineInstrs is:
+ // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
+ MachineInstrBuilder NewMIB;
+ NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::VEXTd32), DDst);
+
+ // On the first instruction, both DSrc and DDst may be <undef> if present.
+ // Specifically when the original instruction didn't have them as an
+ // <imp-use>.
+ unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
+ bool CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ NewMIB.addImm(1);
+ AddDefaultPred(NewMIB);
+
+ if (SrcLane == DstLane)
+ NewMIB.addReg(SrcReg, RegState::Implicit);
+
+ MI->setDesc(get(ARM::VEXTd32));
+ MIB.addReg(DDst, RegState::Define);
+
+ // On the second instruction, DDst has definitely been defined above, so
+ // it is not <undef>. DSrc, if present, can be <undef> as above.
+ CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ MIB.addImm(1);
AddDefaultPred(MIB);
+
+ if (SrcLane != DstLane)
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ // As before, the original destination is no longer represented, add it
+ // implicitly.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
break;
+ }
}
}
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 132b81f..89cbc84 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -410,7 +410,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
do {
DEBUG(errs() << "JITTing function '"
- << MF.getFunction()->getName() << "'\n");
+ << MF.getName() << "'\n");
MCE.startFunction(MF);
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index a953985..dd05f0c 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1388,10 +1388,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// If the original WaterList entry was "new water" on this iteration,
// propagate that to the new island. This is just keeping NewWaterList
// updated to match the WaterList, which will be updated below.
- if (NewWaterList.count(WaterBB)) {
- NewWaterList.erase(WaterBB);
+ if (NewWaterList.erase(WaterBB))
NewWaterList.insert(NewIsland);
- }
+
// The new CPE goes before the following block (NewMBB).
NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 15bb32e..8ed6b75 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1208,6 +1208,57 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
+ case ARM::VSETLNi8Q:
+ case ARM::VSETLNi16Q: {
+ // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting
+ // on the respective D register.
+
+ unsigned QReg = MI.getOperand(1).getReg();
+ unsigned QLane = MI.getOperand(3).getImm();
+
+ unsigned NewOpcode, DLane, DSubReg;
+ switch (Opcode) {
+ default: llvm_unreachable("Invalid opcode!");
+ case ARM::VSETLNi8Q:
+ // 4 possible 8-bit lanes per DPR:
+ NewOpcode = ARM::VSETLNi8;
+ DLane = QLane % 8;
+ DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0;
+ break;
+ case ARM::VSETLNi16Q:
+ // 4 possible 16-bit lanes per DPR.
+ NewOpcode = ARM::VSETLNi16;
+ DLane = QLane % 4;
+ DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0;
+ break;
+ }
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode));
+
+ unsigned DReg = TRI->getSubReg(QReg, DSubReg);
+
+ MIB.addReg(DReg, RegState::Define); // Output DPR
+ MIB.addReg(DReg); // Input DPR
+ MIB.addOperand(MI.getOperand(2)); // Input GPR
+ MIB.addImm(DLane); // Lane
+
+ // Add the predicate operands.
+ MIB.addOperand(MI.getOperand(4));
+ MIB.addOperand(MI.getOperand(5));
+
+ if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register.
+ MIB->addRegisterKilled(QReg, TRI, true);
+ // And an implicit def of the output register (which should always be the
+ // same as the input register).
+ MIB->addRegisterDefined(QReg, TRI);
+
+ TransferImpOps(MI, MIB, MIB);
+
+ MI.eraseFromParent();
+ return true;
+ }
+
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 5a5ca1b..045d904 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -617,10 +617,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
if (VT != MVT::i32) return 0;
Reloc::Model RelocM = TM.getRelocationModel();
-
- // TODO: Need more magic for ARM PIC.
- if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
-
+ bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
// Use movw+movt when possible, it avoids constant pool entries.
@@ -668,17 +665,30 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
.addConstantPoolIndex(Idx);
if (RelocM == Reloc::PIC_)
MIB.addImm(Id);
+ AddOptionalDefs(MIB);
} else {
// The extra immediate is for addrmode2.
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
DestReg)
.addConstantPoolIndex(Idx)
.addImm(0);
+ AddOptionalDefs(MIB);
+
+ if (RelocM == Reloc::PIC_) {
+ unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), NewDestReg)
+ .addReg(DestReg)
+ .addImm(Id);
+ AddOptionalDefs(MIB);
+ return NewDestReg;
+ }
}
- AddOptionalDefs(MIB);
}
- if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ if (IsIndirect) {
MachineInstrBuilder MIB;
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
@@ -2212,25 +2222,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(CallOpc));
- if (isThumb2) {
- // Explicitly adding the predicate here.
+ // BL / BLX don't take a predicate, but tBL / tBLX do.
+ if (isThumb2)
AddDefaultPred(MIB);
- if (EnableARMLongCalls)
- MIB.addReg(CalleeReg);
- else
- MIB.addExternalSymbol(TLI.getLibcallName(Call));
- } else {
- if (EnableARMLongCalls)
- MIB.addReg(CalleeReg);
- else
- MIB.addExternalSymbol(TLI.getLibcallName(Call));
+ if (EnableARMLongCalls)
+ MIB.addReg(CalleeReg);
+ else
+ MIB.addExternalSymbol(TLI.getLibcallName(Call));
- // Explicitly adding the predicate here.
- AddDefaultPred(MIB);
- }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i]);
+ MIB.addReg(RegArgs[i], RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
@@ -2358,30 +2360,20 @@ bool ARMFastISel::SelectCall(const Instruction *I,
unsigned CallOpc = ARMSelectCallOp(UseReg);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(CallOpc));
- if(isThumb2) {
- // Explicitly adding the predicate here.
- AddDefaultPred(MIB);
- if (UseReg)
- MIB.addReg(CalleeReg);
- else if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
- else
- MIB.addExternalSymbol(IntrMemName, 0);
- } else {
- if (UseReg)
- MIB.addReg(CalleeReg);
- else if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
- else
- MIB.addExternalSymbol(IntrMemName, 0);
- // Explicitly adding the predicate here.
+ // ARM calls don't take a predicate, but tBL / tBLX do.
+ if(isThumb2)
AddDefaultPred(MIB);
- }
+ if (UseReg)
+ MIB.addReg(CalleeReg);
+ else if (!IntrMemName)
+ MIB.addGlobalAddress(GV, 0, 0);
+ else
+ MIB.addExternalSymbol(IntrMemName, 0);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i]);
+ MIB.addReg(RegArgs[i], RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
@@ -2650,7 +2642,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
unsigned Reg1 = getRegForValue(Src1Value);
if (Reg1 == 0) return false;
- unsigned Reg2;
+ unsigned Reg2 = 0;
if (Opc == ARM::MOVsr) {
Reg2 = getRegForValue(Src2Value);
if (Reg2 == 0) return false;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c6f9d15..1406620 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2637,6 +2637,38 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
dl, MVT::i32, MVT::i32, Ops, 5);
}
}
+ case ARMISD::UMLAL:{
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ }else{
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMLAL : ARM::UMLALv5,
+ dl, MVT::i32, MVT::i32, Ops, 7);
+ }
+ }
+ case ARMISD::SMLAL:{
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32)};
+ return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ }else{
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMLAL : ARM::SMLALv5,
+ dl, MVT::i32, MVT::i32, Ops, 7);
+ }
+ }
case ISD::LOAD: {
SDNode *ResNode = 0;
if (Subtarget->isThumb() && Subtarget->hasThumb2())
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index df4039b..e51315e 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -514,6 +514,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -566,6 +567,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
}
+ // ARM and Thumb2 support UMLAL/SMLAL.
+ if (!Subtarget->isThumb1Only())
+ setTargetDAGCombine(ISD::ADDC);
+
+
computeRegisterProperties();
// ARM does not have f32 extending load.
@@ -791,12 +797,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
-
- if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::XOR);
- }
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::XOR);
if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
@@ -981,6 +984,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::UMLAL: return "ARMISD::UMLAL";
+ case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
@@ -4154,10 +4159,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
// Scan through the operands to see if only one value is used.
+ //
+ // As an optimisation, even if more than one value is used it may be more
+ // profitable to splat with one value then change some lanes.
+ //
+ // Heuristically we decide to do this if the vector has a "dominant" value,
+ // defined as splatted to more than half of the lanes.
unsigned NumElts = VT.getVectorNumElements();
bool isOnlyLowElement = true;
bool usesOnlyOneValue = true;
+ bool hasDominantValue = false;
bool isConstant = true;
+
+ // Map of the number of times a particular SDValue appears in the
+ // element list.
+ DenseMap<SDValue, unsigned> ValueCounts;
SDValue Value;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
@@ -4168,13 +4184,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
isConstant = false;
- if (!Value.getNode())
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+
+ // Is this value dominant? (takes up more than half of the lanes)
+ if (++Count > (NumElts / 2)) {
+ hasDominantValue = true;
Value = V;
- else if (V != Value)
- usesOnlyOneValue = false;
+ }
}
+ if (ValueCounts.size() != 1)
+ usesOnlyOneValue = false;
+ if (!Value.getNode() && ValueCounts.size() > 0)
+ Value = ValueCounts.begin()->first;
- if (!Value.getNode())
+ if (ValueCounts.size() == 0)
return DAG.getUNDEF(VT);
if (isOnlyLowElement)
@@ -4184,9 +4208,34 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Use VDUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
- if (usesOnlyOneValue && EltSize <= 32) {
- if (!isConstant)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (hasDominantValue && EltSize <= 32) {
+ if (!isConstant) {
+ SDValue N;
+
+ // If we are VDUPing a value that comes directly from a vector, that will
+ // cause an unnecessary move to and from a GPR, where instead we could
+ // just use VDUPLANE.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
+ N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+ Value->getOperand(0), Value->getOperand(1));
+ else
+ N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ if (!usesOnlyOneValue) {
+ // The dominant value was splatted as 'N', but we now have to insert
+ // all differing elements.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (Op.getOperand(I) == Value)
+ continue;
+ SmallVector<SDValue, 3> Ops;
+ Ops.push_back(N);
+ Ops.push_back(Op.getOperand(I));
+ Ops.push_back(DAG.getConstant(I, MVT::i32));
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
+ }
+ }
+ return N;
+ }
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
@@ -4198,9 +4247,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
- SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
- if (Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ if (usesOnlyOneValue) {
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (isConstant && Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
}
// If all elements are constants and the case above didn't get hit, fall back
@@ -5418,7 +5469,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
@@ -5529,7 +5580,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
+ (const TargetRegisterClass*)&ARM::rGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned scratch2 = MRI.createVirtualRegister(TRC);
@@ -7193,6 +7244,154 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
}
+static SDValue findMUL_LOHI(SDValue V) {
+ if (V->getOpcode() == ISD::UMUL_LOHI ||
+ V->getOpcode() == ISD::SMUL_LOHI)
+ return V;
+ return SDValue();
+}
+
+static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ if (Subtarget->isThumb1Only()) return SDValue();
+
+ // Only perform the checks after legalize when the pattern is available.
+ if (DCI.isBeforeLegalize()) return SDValue();
+
+ // Look for multiply add opportunities.
+ // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
+ // each add nodes consumes a value from ISD::UMUL_LOHI and there is
+ // a glue link from the first add to the second add.
+ // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
+ // a S/UMLAL instruction.
+ // loAdd UMUL_LOHI
+ // \ / :lo \ :hi
+ // \ / \ [no multiline comment]
+ // ADDC | hiAdd
+ // \ :glue / /
+ // \ / /
+ // ADDE
+ //
+ assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
+ SDValue AddcOp0 = AddcNode->getOperand(0);
+ SDValue AddcOp1 = AddcNode->getOperand(1);
+
+ // Check if the two operands are from the same mul_lohi node.
+ if (AddcOp0.getNode() == AddcOp1.getNode())
+ return SDValue();
+
+ assert(AddcNode->getNumValues() == 2 &&
+ AddcNode->getValueType(0) == MVT::i32 &&
+ AddcNode->getValueType(1) == MVT::Glue &&
+ "Expect ADDC with two result values: i32, glue");
+
+ // Check that the ADDC adds the low result of the S/UMUL_LOHI.
+ if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
+ AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
+ AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
+ AddcOp1->getOpcode() != ISD::SMUL_LOHI)
+ return SDValue();
+
+ // Look for the glued ADDE.
+ SDNode* AddeNode = AddcNode->getGluedUser();
+ if (AddeNode == NULL)
+ return SDValue();
+
+ // Make sure it is really an ADDE.
+ if (AddeNode->getOpcode() != ISD::ADDE)
+ return SDValue();
+
+ assert(AddeNode->getNumOperands() == 3 &&
+ AddeNode->getOperand(2).getValueType() == MVT::Glue &&
+ "ADDE node has the wrong inputs");
+
+ // Check for the triangle shape.
+ SDValue AddeOp0 = AddeNode->getOperand(0);
+ SDValue AddeOp1 = AddeNode->getOperand(1);
+
+ // Make sure that the ADDE operands are not coming from the same node.
+ if (AddeOp0.getNode() == AddeOp1.getNode())
+ return SDValue();
+
+ // Find the MUL_LOHI node walking up ADDE's operands.
+ bool IsLeftOperandMUL = false;
+ SDValue MULOp = findMUL_LOHI(AddeOp0);
+ if (MULOp == SDValue())
+ MULOp = findMUL_LOHI(AddeOp1);
+ else
+ IsLeftOperandMUL = true;
+ if (MULOp == SDValue())
+ return SDValue();
+
+ // Figure out the right opcode.
+ unsigned Opc = MULOp->getOpcode();
+ unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
+
+ // Figure out the high and low input values to the MLAL node.
+ SDValue* HiMul = &MULOp;
+ SDValue* HiAdd = NULL;
+ SDValue* LoMul = NULL;
+ SDValue* LowAdd = NULL;
+
+ if (IsLeftOperandMUL)
+ HiAdd = &AddeOp1;
+ else
+ HiAdd = &AddeOp0;
+
+
+ if (AddcOp0->getOpcode() == Opc) {
+ LoMul = &AddcOp0;
+ LowAdd = &AddcOp1;
+ }
+ if (AddcOp1->getOpcode() == Opc) {
+ LoMul = &AddcOp1;
+ LowAdd = &AddcOp0;
+ }
+
+ if (LoMul == NULL)
+ return SDValue();
+
+ if (LoMul->getNode() != HiMul->getNode())
+ return SDValue();
+
+ // Create the merged node.
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(LoMul->getOperand(0));
+ Ops.push_back(LoMul->getOperand(1));
+ Ops.push_back(*LowAdd);
+ Ops.push_back(*HiAdd);
+
+ SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
+ DAG.getVTList(MVT::i32, MVT::i32),
+ &Ops[0], Ops.size());
+
+ // Replace the ADDs' nodes uses by the MLA node's values.
+ SDValue HiMLALResult(MLALNode.getNode(), 1);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
+
+ SDValue LoMLALResult(MLALNode.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
+
+ // Return original node to notify the driver to stop replacing.
+ SDValue resNode(AddcNode, 0);
+ return resNode;
+}
+
+/// PerformADDCCombine - Target-specific dag combine transform from
+/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
+static SDValue PerformADDCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ return AddCombineTo64bitMLAL(N, DCI, Subtarget);
+
+}
+
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is
/// called with the default operands, and if that fails, with commuted
@@ -8764,6 +8963,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
+ case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 13b83de..2b8f382 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -173,6 +173,9 @@ namespace llvm {
VMULLs, // ...signed
VMULLu, // ...unsigned
+ UMLAL, // 64bit Unsigned Accumulate Multiply
+ SMLAL, // 64bit Signed Accumulate Multiply
+
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
@@ -257,6 +260,11 @@ namespace llvm {
virtual const char *getTargetNodeName(unsigned Opcode) const;
+ virtual bool isSelectSupported(SelectSupportKind Kind) const {
+ // ARM does not support scalar condition selects on vectors.
+ return (Kind != ScalarCondVectorVal);
+ }
+
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
virtual EVT getSetCCResultType(EVT VT) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 992aba5..e23989e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -83,6 +83,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
SDTCisInt<0>,
SDTCisVT<1, i32>,
SDTCisVT<4, i32>]>;
+
+def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
+def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
+def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
@@ -90,9 +97,10 @@ def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPOptInGlue, SDNPOutGlue]>;
def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
SDT_ARMStructByVal,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
@@ -148,14 +156,16 @@ def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
- SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+ SDT_ARMEH_SJLJ_Setjmp,
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
- SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+ SDT_ARMEH_SJLJ_Longjmp,
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
@@ -275,7 +285,7 @@ def imm16_31 : ImmLeaf<i32, [{
def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
- int64_t Value = -(int)N->getZExtValue();
+ unsigned Value = -(unsigned)N->getZExtValue();
return Value && ARM_AM::getSOImmVal(Value) != -1;
}], imm_neg_XFORM> {
let ParserMatchClass = so_imm_neg_asmoperand;
@@ -1791,12 +1801,15 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
let Inst{15-12} = Rd;
let Inst{11-0} = label{11-0};
}
+
+let hasSideEffects = 1 in {
def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>;
def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
4, IIC_iALUi, []>;
+}
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -3399,6 +3412,18 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{11-8} = Rm;
let Inst{3-0} = Rn;
}
+class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
// FIXME: The v5 pseudos are only necessary for the additional Constraint
// property. Remove them when it's possible to add those properties
@@ -3481,14 +3506,14 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
}
// Multiply + accumulate
-def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
-def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
@@ -3504,17 +3529,22 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
let Inst{3-0} = Rn;
}
-let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in {
def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
4, IIC_iMAC64, [],
- (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
+ pred:$p, cc_out:$s)>,
Requires<[IsARM, NoV6]>;
def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
4, IIC_iMAC64, [],
- (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
+ pred:$p, cc_out:$s)>,
Requires<[IsARM, NoV6]>;
+}
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, pred:$p),
4, IIC_iMAC64, [],
@@ -3986,48 +4016,6 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
-// Conditional instructions
-multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
- Instruction irsr,
- InstrItinClass iii, InstrItinClass iir,
- InstrItinClass iis> {
- def ri : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm,
- pred:$p, cc_out:$s),
- 4, iii, [],
- (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def rr : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm,
- pred:$p, cc_out:$s),
- 4, iir, [],
- (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def rsi : ARMPseudoExpand<(outs GPR:$Rd),
- (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift,
- pred:$p, cc_out:$s),
- 4, iis, [],
- (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift,
- pred:$p, cc_out:$s),
- 4, iis, [],
- (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
-}
-
-defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr,
- IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-
} // neverHasSideEffects
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 048d340..8158a11 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1980,7 +1980,7 @@ def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
- let Inst{4} = Rn{5};
+ let Inst{4} = Rn{4};
}
def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
@@ -2023,7 +2023,7 @@ def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
NEONvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
- let Inst{4} = Rn{5};
+ let Inst{4} = Rn{4};
}
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
extractelt, addrmode6oneL32> {
@@ -5045,25 +5045,23 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
GPR:$R, imm:$lane))]> {
let Inst{21} = lane{0};
}
+
+def VSETLNi8Q : PseudoNeonI<(outs QPR:$V),
+ (ins QPR:$src1, GPR:$R, VectorIndex8:$lane),
+ IIC_VMOVISL, "",
+ [(set QPR:$V, (vector_insert (v16i8 QPR:$src1),
+ GPR:$R, imm:$lane))]>;
+def VSETLNi16Q : PseudoNeonI<(outs QPR:$V),
+ (ins QPR:$src1, GPR:$R, VectorIndex16:$lane),
+ IIC_VMOVISL, "",
+ [(set QPR:$V, (vector_insert (v8i16 QPR:$src1),
+ GPR:$R, imm:$lane))]>;
}
-def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
- (v16i8 (INSERT_SUBREG QPR:$src1,
- (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
- (DSubReg_i8_reg imm:$lane))),
- GPR:$src2, (SubReg_i8_lane imm:$lane))),
- (DSubReg_i8_reg imm:$lane)))>;
-def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
- (v8i16 (INSERT_SUBREG QPR:$src1,
- (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
- (DSubReg_i16_reg imm:$lane))),
- GPR:$src2, (SubReg_i16_lane imm:$lane))),
- (DSubReg_i16_reg imm:$lane)))>;
+
def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
- (v4i32 (INSERT_SUBREG QPR:$src1,
- (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
- (DSubReg_i32_reg imm:$lane))),
- GPR:$src2, (SubReg_i32_lane imm:$lane))),
- (DSubReg_i32_reg imm:$lane)))>;
+ (v4i32 (INSERT_SUBREG QPR:$src1,
+ GPR:$src2,
+ (SSubReg_f32_reg imm:$lane)))>;
def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
(INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 554f6d9..e171f8b 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1200,6 +1200,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
2, IIC_iALUi, []>;
+let hasSideEffects = 1 in
def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
2, IIC_iALUi, []>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 8ecf009..f1a6cce 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -523,6 +523,23 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
let Inst{7-4} = opc7_4;
let Inst{3-0} = Rm;
}
+class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
@@ -757,33 +774,6 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{24} = 1;
let Inst{23-21} = op23_21;
}
-
- // Predicated versions.
- def CCri : t2PseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm,
- pred:$p, cc_out:$s), 4, IIC_iALUi, [],
- (!cast<Instruction>(NAME#ri) GPRnopc:$Rd,
- GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm,
- pred:$p),
- 4, IIC_iALUi, [],
- (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd,
- GPR:$Rn, imm0_4095:$imm, pred:$p)>,
- RegConstraint<"$Rfalse = $Rd">;
- def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm,
- pred:$p, cc_out:$s), 4, IIC_iALUr, [],
- (!cast<Instruction>(NAME#rr) GPRnopc:$Rd,
- GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd),
- (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm,
- pred:$p, cc_out:$s), 4, IIC_iALUsi, [],
- (!cast<Instruction>(NAME#rs) GPRnopc:$Rd,
- GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
}
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
@@ -1200,6 +1190,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
let neverHasSideEffects = 1, isReMaterializable = 1 in
def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>;
+let hasSideEffects = 1 in
def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
4, IIC_iALUi,
@@ -2437,15 +2428,17 @@ def t2UMULL : T2MulLong<0b010, 0b0000,
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2MulLong<0b100, 0b0000,
+def t2SMLAL : T2MlaLong<0b100, 0b0000,
(outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
- "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
-def t2UMLAL : T2MulLong<0b110, 0b0000,
+def t2UMLAL : T2MlaLong<0b110, 0b0000,
(outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
- "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
def t2UMAAL : T2MulLong<0b110, 0b0110,
(outs rGPR:$RdLo, rGPR:$RdHi),
@@ -3049,37 +3042,6 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
RegConstraint<"$false = $Rd">;
} // isCodeGenOnly = 1
-multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
- // shifted imm
- def ri : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm,
- pred:$p, cc_out:$s),
- 4, iii, [],
- (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- // register
- def rr : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm,
- pred:$p, cc_out:$s),
- 4, iir, [],
- (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
- // shifted register
- def rs : t2PseudoExpand<(outs rGPR:$Rd),
- (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm,
- pred:$p, cc_out:$s),
- 4, iis, [],
- (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
- RegConstraint<"$Rfalse = $Rd">;
-} // T2I_bincc_irs
-
-defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
- IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs,
- IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs,
- IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index a812e21..2dc49a9 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -169,7 +169,7 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
intptr_t LazyPtr = getIndirectSymAddr(Fn);
if (!LazyPtr) {
// In PIC mode, the function stub is loading a lazy-ptr.
- LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+ LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE);
DEBUG(if (F)
errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
<< "] for GV '" << F->getName() << "'\n";
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 3a5957b..e1e2f6e 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -181,49 +181,44 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
// Asm Match Converter Methods
- bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+ void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ void cvtLdWriteBackRegAddrMode2(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+ void cvtStWriteBackRegAddrModeImm12(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ void cvtStWriteBackRegAddrMode2(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ void cvtStWriteBackRegAddrMode3(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ void cvtLdExtTWriteBackImm(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ void cvtLdExtTWriteBackReg(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+ void cvtStExtTWriteBackImm(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+ void cvtStExtTWriteBackReg(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdrdPre(MCInst &Inst, unsigned Opcode,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtStrdPre(MCInst &Inst, unsigned Opcode,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtLdWriteBackRegAddrMode3(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+ void cvtThumbMultiply(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+ void cvtVLDwbFixed(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+ void cvtVLDwbRegister(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+ void cvtVSTwbFixed(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
- bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+ void cvtVSTwbRegister(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
-
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool processInstruction(MCInst &Inst,
@@ -267,6 +262,12 @@ public:
bool MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
+
+ unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned OperandNum, unsigned &NumMCOperands) {
+ return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands);
+ }
};
} // end anonymous namespace
@@ -3880,8 +3881,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// cvtT2LdrdPre - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtT2LdrdPre(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Rt, Rt2
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -3892,14 +3893,13 @@ cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtT2StrdPre - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtT2StrdPre(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateReg(0));
@@ -3910,14 +3910,13 @@ cvtT2StrdPre(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -3926,28 +3925,26 @@ cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdWriteBackRegAddrMode2(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -3956,14 +3953,13 @@ cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdWriteBackRegAddrModeImm12(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -3972,57 +3968,53 @@ cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStWriteBackRegAddrModeImm12(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStWriteBackRegAddrMode2(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStWriteBackRegAddrMode3(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdExtTWriteBackImm(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Rt
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -4034,14 +4026,13 @@ cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdExtTWriteBackReg(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Rt
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -4053,14 +4044,13 @@ cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStExtTWriteBackImm(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4072,14 +4062,13 @@ cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStExtTWriteBackReg(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4091,14 +4080,13 @@ cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdrdPre - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdrdPre(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdrdPre(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Rt, Rt2
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
@@ -4109,14 +4097,13 @@ cvtLdrdPre(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtStrdPre - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtStrdPre(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtStrdPre(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4127,40 +4114,27 @@ cvtStrdPre(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtLdWriteBackRegAddrMode3(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3);
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
-/// cvtThumbMultiple- Convert parsed operands to MCInst.
+/// cvtThumbMultiply - Convert parsed operands to MCInst.
/// Needed here because the Asm Gen Matcher can't handle properly tied operands
/// when they refer multiple MIOperands inside a single one.
-bool ARMAsmParser::
-cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtThumbMultiply(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // The second source operand must be the same register as the destination
- // operand.
- if (Operands.size() == 6 &&
- (((ARMOperand*)Operands[3])->getReg() !=
- ((ARMOperand*)Operands[5])->getReg()) &&
- (((ARMOperand*)Operands[3])->getReg() !=
- ((ARMOperand*)Operands[4])->getReg())) {
- Error(Operands[3]->getStartLoc(),
- "destination register must match source register");
- return false;
- }
((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
// If we have a three-operand form, make sure to set Rn to be the operand
@@ -4173,12 +4147,10 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
Inst.addOperand(Inst.getOperand(0));
((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
-
- return true;
}
-bool ARMAsmParser::
-cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtVLDwbFixed(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
@@ -4188,11 +4160,10 @@ cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
-bool ARMAsmParser::
-cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtVLDwbRegister(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
@@ -4204,11 +4175,10 @@ cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
-bool ARMAsmParser::
-cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtVSTwbFixed(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4218,11 +4188,10 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
-bool ARMAsmParser::
-cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+void ARMAsmParser::
+cvtVSTwbRegister(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
@@ -4234,7 +4203,6 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
- return true;
}
/// Parse an ARM memory expression, return false if successful else return true
@@ -5377,6 +5345,25 @@ validateInstruction(MCInst &Inst,
"in register list");
break;
}
+ case ARM::tMUL: {
+ // The second source operand must be the same register as the destination
+ // operand.
+ //
+ // In this case, we must directly check the parsed operands because the
+ // cvtThumbMultiply() function is written in such a way that it guarantees
+ // this first statement is always true for the new Inst. Essentially, the
+ // destination is unconditionally copied into the second source operand
+ // without checking to see if it matches what we actually parsed.
+ if (Operands.size() == 6 &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[5])->getReg()) &&
+ (((ARMOperand*)Operands[3])->getReg() !=
+ ((ARMOperand*)Operands[4])->getReg())) {
+ return Error(Operands[3]->getStartLoc(),
+ "destination register must match source register");
+ }
+ break;
+ }
// Like for ldm/stm, push and pop have hi-reg handling version in Thumb2,
// so only issue a diagnostic for thumb1. The instructions will be
// switched to the t2 encodings in processInstruction() if necessary.
@@ -7475,9 +7462,11 @@ MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
MCInst Inst;
+ unsigned Kind;
unsigned ErrorInfo;
unsigned MatchResult;
- MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+
+ MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo);
switch (MatchResult) {
default: break;
case Match_Success:
@@ -7540,9 +7529,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
case Match_MnemonicFail:
return Error(IDLoc, "invalid instruction",
((ARMOperand*)Operands[0])->getLocRange());
- case Match_ConversionFail:
- // The converter function will have already emitted a diagnostic.
- return true;
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
case Match_RequiresITBlock:
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index c90751d..57642e1 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -2701,6 +2701,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
unsigned align = fieldFromInstruction(Insn, 4, 1);
unsigned size = fieldFromInstruction(Insn, 6, 2);
+ if (size == 0 && align == 1)
+ return MCDisassembler::Fail;
align *= (1 << size);
switch (Inst.getOpcode()) {
@@ -2831,6 +2833,8 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
unsigned align = fieldFromInstruction(Insn, 4, 1);
if (size == 0x3) {
+ if (align == 0)
+ return MCDisassembler::Fail;
size = 4;
align = 16;
} else {
@@ -3170,7 +3174,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
int imm = Val & 0xFF;
if (!(Val & 0x100)) imm *= -1;
- Inst.addOperand(MCOperand::CreateImm(imm << 2));
+ Inst.addOperand(MCOperand::CreateImm(imm * 4));
}
return MCDisassembler::Success;
@@ -3710,8 +3714,16 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
if (fieldFromInstruction(Insn, 6, 1))
return MCDisassembler::Fail; // UNDEFINED
index = fieldFromInstruction(Insn, 7, 1);
- if (fieldFromInstruction(Insn, 4, 2) != 0)
- align = 4;
+
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0 :
+ align = 0; break;
+ case 3:
+ align = 4; break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ break;
}
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -3769,8 +3781,16 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
if (fieldFromInstruction(Insn, 6, 1))
return MCDisassembler::Fail; // UNDEFINED
index = fieldFromInstruction(Insn, 7, 1);
- if (fieldFromInstruction(Insn, 4, 2) != 0)
- align = 4;
+
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ align = 4; break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ break;
}
if (Rm != 0xF) { // Writeback
@@ -4090,8 +4110,15 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
inc = 2;
break;
case 2:
- if (fieldFromInstruction(Insn, 4, 2))
- align = 4 << fieldFromInstruction(Insn, 4, 2);
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ return MCDisassembler::Fail;
+ default:
+ align = 4 << fieldFromInstruction(Insn, 4, 2); break;
+ }
+
index = fieldFromInstruction(Insn, 7, 1);
if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
@@ -4164,8 +4191,15 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
inc = 2;
break;
case 2:
- if (fieldFromInstruction(Insn, 4, 2))
- align = 4 << fieldFromInstruction(Insn, 4, 2);
+ switch (fieldFromInstruction(Insn, 4, 2)) {
+ case 0:
+ align = 0; break;
+ case 3:
+ return MCDisassembler::Fail;
+ default:
+ align = 4 << fieldFromInstruction(Insn, 4, 2); break;
+ }
+
index = fieldFromInstruction(Insn, 7, 1);
if (fieldFromInstruction(Insn, 6, 1))
inc = 2;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 7d6acbc..b53da3b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -194,6 +194,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_uncondbranch:
Type = ELF::R_ARM_JUMP24;
break;
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ Type = ELF::R_ARM_THM_JUMP24;
+ break;
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
Type = ELF::R_ARM_MOVT_PREL;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index d32805e..c1aab9c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -50,7 +50,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
Code32Directive = ".code\t32";
WeakRefDirective = "\t.weak\t";
- LCOMMDirectiveType = LCOMM::NoAlignment;
HasLEB128 = true;
SupportsDebugInformation = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 94f1082..1917564 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -783,7 +783,7 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (Imm8 < 0)
- Imm8 = -Imm8;
+ Imm8 = -(uint32_t)Imm8;
// Scaled by 4.
Imm8 /= 4;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index a51e0fa..95640f7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -410,7 +410,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
if (Type == macho::RIT_ARM_Half) {
// The other-half value only gets populated for the movt and movw
// relocation entries.
- uint32_t Value = 0;;
+ uint32_t Value = 0;
switch ((unsigned)Fixup.getKind()) {
default: break;
case ARM::fixup_arm_movw_lo16:
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 03d5a9a..3396e8b 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -130,8 +130,7 @@ namespace {
void
printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
{
- short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
- >> 16);
+ short value = MI->getOperand(OpNo).getImm();
assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
&& "Invalid s10 argument");
O << value;
@@ -140,8 +139,7 @@ namespace {
void
printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
{
- short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
- >> 16);
+ short value = MI->getOperand(OpNo).getImm();
assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
O << value;
}
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index c27caea..425371d 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -83,12 +83,10 @@ namespace {
return true;
} else if (vt == MVT::i32) {
int32_t i_val = (int32_t) CN->getZExtValue();
- short s_val = (short) i_val;
- return i_val == s_val;
+ return i_val == SignExtend32<16>(i_val);
} else {
int64_t i_val = (int64_t) CN->getZExtValue();
- short s_val = (short) i_val;
- return i_val == s_val;
+ return i_val == SignExtend64<16>(i_val);
}
}
@@ -99,9 +97,10 @@ namespace {
EVT vt = FPN->getValueType(0);
if (vt == MVT::f32) {
int val = FloatToBits(FPN->getValueAPF().convertToFloat());
- int sval = (int) ((val << 16) >> 16);
- Imm = (short) val;
- return val == sval;
+ if (val == SignExtend32<16>(val)) {
+ Imm = (short) val;
+ return true;
+ }
}
return false;
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 1f2d8ac..306084b 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_target(HexagonCodeGen
HexagonExpandPredSpillCode.cpp
HexagonFrameLowering.cpp
HexagonHardwareLoops.cpp
+ HexagonMachineScheduler.cpp
HexagonMCInstLower.cpp
HexagonInstrInfo.cpp
HexagonISelDAGToDAG.cpp
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
new file mode 100644
index 0000000..b131a8f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -0,0 +1,952 @@
+//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "HexagonMachineScheduler.h"
+
+#include <queue>
+
+using namespace llvm;
+
+static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden,
+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+/// Decrement this iterator until reaching the top or a non-debug instr.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
+ assert(I != Beg && "reached the top of the region, cannot decrement");
+ while (--I != Beg) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+///
+/// FIXME: Adjust SuccSU height based on MinLatency.
+void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ releaseSucc(SU, &*I);
+ }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+///
+/// FIXME: Adjust PredSU height based on MinLatency.
+void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+ SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void VLIWMachineScheduler::releasePredecessors(SUnit *SU) {
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ releasePred(SU, &*I);
+ }
+}
+
+void VLIWMachineScheduler::moveInstruction(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPos) {
+ // Advance RegionBegin if the first instruction moves down.
+ if (&*RegionBegin == MI)
+ ++RegionBegin;
+
+ // Update the instruction stream.
+ BB->splice(InsertPos, BB, MI);
+
+ // Update LiveIntervals
+ LIS->handleMove(MI);
+
+ // Recede RegionBegin if an instruction moves above the first.
+ if (RegionBegin == InsertPos)
+ RegionBegin = MI;
+}
+
+bool VLIWMachineScheduler::checkSchedLimit() {
+#ifndef NDEBUG
+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+ CurrentTop = CurrentBottom;
+ return false;
+ }
+ ++NumInstrsScheduled;
+#endif
+ return true;
+}
+
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount)
+{
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+
+ // For convenience remember the end of the liveness region.
+ LiveRegionEnd =
+ (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
+}
+
+// Setup the register pressure trackers for the top scheduled top and bottom
+// scheduled regions.
+void VLIWMachineScheduler::initRegPressure() {
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ DEBUG(RPTracker.getPressure().dump(TRI));
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Close one end of the tracker so we can call
+ // getMaxUpward/DownwardPressureDelta before advancing across any
+ // instructions. This converts currently live regs into live ins/outs.
+ TopRPTracker.closeTop();
+ BotRPTracker.closeBottom();
+
+ // Account for liveness generated by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ BotRPTracker.recede();
+
+ assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+
+ // Cache the list of excess pressure sets in this region. This will also track
+ // the max pressure in the scheduled code for these sets.
+ RegionCriticalPSets.clear();
+ std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
+ for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+ << "Limit " << Limit
+ << " Actual " << RegionPressure[i] << "\n");
+ if (RegionPressure[i] > Limit)
+ RegionCriticalPSets.push_back(PressureElement(i, 0));
+ }
+ DEBUG(dbgs() << "Excess PSets: ";
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ dbgs() << TRI->getRegPressureSetName(
+ RegionCriticalPSets[i].PSetID) << " ";
+ dbgs() << "\n");
+
+ TotalPackets = 0;
+}
+
+// FIXME: When the pressure tracker deals in pressure differences then we won't
+// iterate over all RegionCriticalPSets[i].
+void VLIWMachineScheduler::
+updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
+ unsigned ID = RegionCriticalPSets[i].PSetID;
+ int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
+ if ((int)NewMaxPressure[ID] > MaxUnits)
+ MaxUnits = NewMaxPressure[ID];
+ }
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getInstr())
+ return false;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(SU->getInstr()))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ break;
+ }
+
+ // Now see if there are no other dependencies to instructions already
+ // in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ if (Packet[i]->Succs.size() == 0)
+ continue;
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order dependencies.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU) {
+ bool startNewCycle = false;
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU)) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ ResourcesModel->reserveResources(SU->getInstr());
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ break;
+ }
+ Packet.push_back(SU);
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ DEBUG(dbgs() << "\t[" << i << "] SU(");
+ DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+ DEBUG(Packet[i]->getInstr()->dump());
+ }
+#endif
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ return startNewCycle;
+}
+
+// Release all DAG roots for scheduling.
+void VLIWMachineScheduler::releaseRoots() {
+ SmallVector<SUnit*, 16> BotRoots;
+
+ for (std::vector<SUnit>::iterator
+ I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (I->Preds.empty())
+ SchedImpl->releaseTopNode(&(*I));
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (I->Succs.empty())
+ BotRoots.push_back(&(*I));
+ }
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
+ SchedImpl->releaseBottomNode(*I);
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+ DEBUG(dbgs()
+ << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
+ << " " << BB->getName()
+ << " in_func " << BB->getParent()->getFunction()->getName()
+ << " at loop depth " << MLI->getLoopDepth(BB)
+ << " \n");
+
+ // Initialize the register pressure tracker used by buildSchedGraph.
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Account for liveness generate by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ RPTracker.recede();
+
+ // Build the DAG, and compute current register pressure.
+ buildSchedGraph(AA, &RPTracker);
+
+ // Initialize top/bottom trackers after computing region pressure.
+ initRegPressure();
+
+ // To view Height/Depth correctly, they should be accessed at least once.
+ DEBUG(unsigned maxH = 0;
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ if (SUnits[su].getHeight() > maxH)
+ maxH = SUnits[su].getHeight();
+ dbgs() << "Max Height " << maxH << "\n";);
+ DEBUG(unsigned maxD = 0;
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ if (SUnits[su].getDepth() > maxD)
+ maxD = SUnits[su].getDepth();
+ dbgs() << "Max Depth " << maxD << "\n";);
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ if (ViewMISchedDAGs) viewGraph();
+
+ SchedImpl->initialize(this);
+
+ // Release edges from the special Entry node or to the special Exit node.
+ releaseSuccessors(&EntrySU);
+ releasePredecessors(&ExitSU);
+
+ // Release all DAG roots for scheduling.
+ releaseRoots();
+
+ CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+ CurrentBottom = RegionEnd;
+ bool IsTopNode = false;
+ while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ if (!checkSchedLimit())
+ break;
+
+ // Move the instruction to its new location in the instruction stream.
+ MachineInstr *MI = SU->getInstr();
+
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else {
+ moveInstruction(MI, CurrentTop);
+ TopRPTracker.setPos(MI);
+ }
+
+ // Update top scheduled pressure.
+ TopRPTracker.advance();
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+
+ // Release dependent instructions for scheduling.
+ releaseSuccessors(SU);
+ } else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
+ else {
+ if (&*CurrentTop == MI) {
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ TopRPTracker.setPos(CurrentTop);
+ }
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ // Update bottom scheduled pressure.
+ BotRPTracker.recede();
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
+
+ // Release dependent instructions for scheduling.
+ releasePredecessors(SU);
+ }
+ SU->isScheduled = true;
+ SchedImpl->schedNode(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void VLIWMachineScheduler::placeDebugValues() {
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue) {
+ BB->splice(RegionBegin, BB, FirstDbgValue);
+ RegionBegin = FirstDbgValue;
+ }
+
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrevMI = P.second;
+ BB->splice(++OrigPrevMI, BB, DbgValue);
+ if (OrigPrevMI == llvm::prior(RegionEnd))
+ RegionEnd = DbgValue;
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
+}
+
+void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) {
+ DAG = dag;
+ TRI = DAG->TRI;
+ Top.DAG = dag;
+ Bot.DAG = dag;
+
+ // Initialize the HazardRecognizers.
+ const TargetMachine &TM = DAG->MF.getTarget();
+ const InstrItineraryData *Itin = TM.getInstrItineraryData();
+ Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ Top.ResourceModel = new VLIWResourceModel(TM);
+ Bot.ResourceModel = new VLIWResourceModel(TM);
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getMinLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
+ unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
+ unsigned Width = DAG->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+
+ DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+ bool startNewCycle = false;
+
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+
+ // Update DFA model.
+ startNewCycle = ResourceModel->reserveResources(SU);
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += DAG->getNumMicroOps(SU->getInstr());
+ if (startNewCycle) {
+ DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+ else
+ DEBUG(dbgs() << "*** IssueCount " << IssueCount
+ << " at cycle " << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::SchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin()+i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin()+i);
+ --i; --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ for (unsigned i = 0; Available.empty(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return NULL;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+ const ReadyQueue &Q,
+ SUnit *SU, PressureElement P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
+ << " ";
+ else
+ dbgs() << " ";
+ SU->dump(DAG);
+}
+#endif
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
+ SUnit *OnlyAvailableSucc = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ SUnit &Succ = *I->getSUnit();
+ if (!Succ.isScheduled) {
+ // We found an available, but not scheduled, successor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
+ return 0;
+ OnlyAvailableSucc = &Succ;
+ }
+ }
+ return OnlyAvailableSucc;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned FactorOne = 2;
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+ SchedCandidate &Candidate,
+ RegPressureDelta &Delta,
+ bool verbose) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (!SU || SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Critical path first.
+ if (Q.getID() == TopQID) {
+ ResCount += (SU->getHeight() * ScaleTwo);
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Top.ResourceModel->isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+ } else {
+ ResCount += (SU->getDepth() * ScaleTwo);
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Bot.ResourceModel->isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+ }
+
+ unsigned NumNodesBlocking = 0;
+ if (Q.getID() == TopQID) {
+ // How many SUs does it block from scheduling?
+ // Look at all of the successors of this node.
+ // Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ } else {
+ // How many unscheduled predecessors block this node?
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ }
+ ResCount += (NumNodesBlocking * ScaleTwo);
+
+ // Factor in reg pressure as a heuristic.
+ ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
+ ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
+
+ DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
+
+ return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
+pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ DEBUG(Q.dump());
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+
+ // Best cost.
+ if (CurrentCost > Candidate.SCost) {
+ DEBUG(traceCandidate("CCAND", Q, *I));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ continue;
+ }
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult = pickNodeFromQueue(Bot.Available,
+ DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult = pickNodeFromQueue(Top.Available,
+ DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ if (TopCand.SCost > BotCand.SCost) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ IsTopNode = false;
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return NULL;
+ }
+ SUnit *SU;
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling Instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
+ SU->dump(DAG));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU);
+ } else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU);
+ }
+}
+
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
new file mode 100644
index 0000000..f3643d6
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -0,0 +1,437 @@
+//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom Hexagon MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONASMPRINTER_H
+#define HEXAGONASMPRINTER_H
+
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy - Interface to a machine scheduling algorithm.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+class VLIWMachineScheduler;
+
+/// MachineSchedStrategy - Interface used by VLIWMachineScheduler to drive
+/// the selected scheduling algorithm.
+///
+/// TODO: Move this to ScheduleDAGInstrs.h
+class MachineSchedStrategy {
+public:
+ virtual ~MachineSchedStrategy() {}
+
+ /// Initialize the strategy after building the DAG for a new region.
+ virtual void initialize(VLIWMachineScheduler *DAG) = 0;
+
+ /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
+ /// schedule the node at the top of the unscheduled region. Otherwise it will
+ /// be scheduled at the bottom.
+ virtual SUnit *pickNode(bool &IsTopNode) = 0;
+
+ /// Notify MachineSchedStrategy that VLIWMachineScheduler has
+ /// scheduled a node.
+ virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
+
+ /// When all predecessor dependencies have been resolved, free this node for
+ /// top-down scheduling.
+ virtual void releaseTopNode(SUnit *SU) = 0;
+ /// When all successor dependencies have been resolved, free this node for
+ /// bottom-up scheduling.
+ virtual void releaseBottomNode(SUnit *SU) = 0;
+};
+
+//===----------------------------------------------------------------------===//
+// ConvergingVLIWScheduler - Implementation of the standard
+// MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
+/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
+/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
+class ReadyQueue {
+ unsigned ID;
+ std::string Name;
+ std::vector<SUnit*> Queue;
+
+public:
+ ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
+
+ unsigned getID() const { return ID; }
+
+ StringRef getName() const { return Name; }
+
+ // SU is in this queue if it's NodeQueueID is a superset of this ID.
+ bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
+
+ bool empty() const { return Queue.empty(); }
+
+ unsigned size() const { return Queue.size(); }
+
+ typedef std::vector<SUnit*>::iterator iterator;
+
+ iterator begin() { return Queue.begin(); }
+
+ iterator end() { return Queue.end(); }
+
+ iterator find(SUnit *SU) {
+ return std::find(Queue.begin(), Queue.end(), SU);
+ }
+
+ void push(SUnit *SU) {
+ Queue.push_back(SU);
+ SU->NodeQueueId |= ID;
+ }
+
+ void remove(iterator I) {
+ (*I)->NodeQueueId &= ~ID;
+ *I = Queue.back();
+ Queue.pop_back();
+ }
+
+ void dump() {
+ dbgs() << Name << ": ";
+ for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+ dbgs() << Queue[i]->NodeNum << " ";
+ dbgs() << "\n";
+ }
+};
+
+class VLIWResourceModel {
+ /// ResourcesModel - Represents VLIW state.
+ /// Not limited to VLIW targets per say, but assumes
+ /// definition of DFA by a target.
+ DFAPacketizer *ResourcesModel;
+
+ const InstrItineraryData *InstrItins;
+
+ /// Local packet/bundle model. Purely
+ /// internal to the MI schedulre at the time.
+ std::vector<SUnit*> Packet;
+
+ /// Total packets created.
+ unsigned TotalPackets;
+
+public:
+ VLIWResourceModel(MachineSchedContext *C, const InstrItineraryData *IID) :
+ InstrItins(IID), TotalPackets(0) {
+ const TargetMachine &TM = C->MF->getTarget();
+ ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
+
+ // This hard requirement could be relaxed,
+ // but for now do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ Packet.resize(InstrItins->SchedModel->IssueWidth);
+ Packet.clear();
+ ResourcesModel->clearResources();
+ }
+
+ VLIWResourceModel(const TargetMachine &TM) :
+ InstrItins(TM.getInstrItineraryData()), TotalPackets(0) {
+ ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
+
+ // This hard requirement could be relaxed,
+ // but for now do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ Packet.resize(InstrItins->SchedModel->IssueWidth);
+ Packet.clear();
+ ResourcesModel->clearResources();
+ }
+
+ ~VLIWResourceModel() {
+ delete ResourcesModel;
+ }
+
+ void resetPacketState() {
+ Packet.clear();
+ }
+
+ void resetDFA() {
+ ResourcesModel->clearResources();
+ }
+
+ void reset() {
+ Packet.clear();
+ ResourcesModel->clearResources();
+ }
+
+ bool isResourceAvailable(SUnit *SU);
+ bool reserveResources(SUnit *SU);
+ unsigned getTotalPackets() const { return TotalPackets; }
+};
+
+class VLIWMachineScheduler : public ScheduleDAGInstrs {
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+ RegisterClassInfo *RegClassInfo;
+ MachineSchedStrategy *SchedImpl;
+
+ MachineBasicBlock::iterator LiveRegionEnd;
+
+ /// Register pressure in this region computed by buildSchedGraph.
+ IntervalPressure RegPressure;
+ RegPressureTracker RPTracker;
+
+ /// List of pressure sets that exceed the target's pressure limit before
+ /// scheduling, listed in increasing set ID order. Each pressure set is paired
+ /// with its max pressure in the currently scheduled regions.
+ std::vector<PressureElement> RegionCriticalPSets;
+
+ /// The top of the unscheduled zone.
+ MachineBasicBlock::iterator CurrentTop;
+ IntervalPressure TopPressure;
+ RegPressureTracker TopRPTracker;
+
+ /// The bottom of the unscheduled zone.
+ MachineBasicBlock::iterator CurrentBottom;
+ IntervalPressure BotPressure;
+ RegPressureTracker BotRPTracker;
+
+#ifndef NDEBUG
+ /// The number of instructions scheduled so far. Used to cut off the
+ /// scheduler at the point determined by misched-cutoff.
+ unsigned NumInstrsScheduled;
+#endif
+
+ /// Total packets in the region.
+ unsigned TotalPackets;
+
+ const MachineLoopInfo *MLI;
+public:
+ VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S):
+ ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
+ AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
+ RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
+ CurrentBottom(), BotRPTracker(BotPressure), MLI(C->MLI) {
+#ifndef NDEBUG
+ NumInstrsScheduled = 0;
+#endif
+ TotalPackets = 0;
+ }
+
+ virtual ~VLIWMachineScheduler() {
+ delete SchedImpl;
+ }
+
+ MachineBasicBlock::iterator top() const { return CurrentTop; }
+ MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
+
+ /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
+ /// region. This covers all instructions in a block, while schedule() may only
+ /// cover a subset.
+ void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount);
+
+ /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
+ /// time to do some work.
+ void schedule();
+
+ unsigned CurCycle;
+
+ /// Get current register pressure for the top scheduled instructions.
+ const IntervalPressure &getTopPressure() const { return TopPressure; }
+ const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
+
+ /// Get current register pressure for the bottom scheduled instructions.
+ const IntervalPressure &getBotPressure() const { return BotPressure; }
+ const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
+
+ /// Get register pressure for the entire scheduling region before scheduling.
+ const IntervalPressure &getRegPressure() const { return RegPressure; }
+
+ const std::vector<PressureElement> &getRegionCriticalPSets() const {
+ return RegionCriticalPSets;
+ }
+
+ /// getIssueWidth - Return the max instructions per scheduling group.
+ unsigned getIssueWidth() const {
+ return (InstrItins && InstrItins->SchedModel)
+ ? InstrItins->SchedModel->IssueWidth : 1;
+ }
+
+ /// getNumMicroOps - Return the number of issue slots required for this MI.
+ unsigned getNumMicroOps(MachineInstr *MI) const {
+ return 1;
+ //if (!InstrItins) return 1;
+ //int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
+ //return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
+ }
+
+private:
+ void scheduleNodeTopDown(SUnit *SU);
+ void listScheduleTopDown();
+
+ void initRegPressure();
+ void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
+
+ void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+ bool checkSchedLimit();
+
+ void releaseRoots();
+
+ void releaseSucc(SUnit *SU, SDep *SuccEdge);
+ void releaseSuccessors(SUnit *SU);
+ void releasePred(SUnit *SU, SDep *PredEdge);
+ void releasePredecessors(SUnit *SU);
+
+ void placeDebugValues();
+};
+
+/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics
+/// to balance the schedule.
+class ConvergingVLIWScheduler : public MachineSchedStrategy {
+
+ /// Store the state used by ConvergingVLIWScheduler heuristics, required
+ /// for the lifetime of one invocation of pickNode().
+ struct SchedCandidate {
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ // Register pressure values for the best candidate.
+ RegPressureDelta RPDelta;
+
+ // Best scheduling cost.
+ int SCost;
+
+ SchedCandidate(): SU(NULL), SCost(0) {}
+ };
+ /// Represent the type of SchedCandidate found within a single queue.
+ enum CandResult {
+ NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
+ BestCost};
+
+ /// Each Scheduling boundary is associated with ready queues. It tracks the
+ /// current cycle in whichever direction at has moved, and maintains the state
+ /// of "hazards" and other interlocks at the current cycle.
+ struct SchedBoundary {
+ VLIWMachineScheduler *DAG;
+
+ ReadyQueue Available;
+ ReadyQueue Pending;
+ bool CheckPending;
+
+ ScheduleHazardRecognizer *HazardRec;
+ VLIWResourceModel *ResourceModel;
+
+ unsigned CurrCycle;
+ unsigned IssueCount;
+
+ /// MinReadyCycle - Cycle of the soonest available instruction.
+ unsigned MinReadyCycle;
+
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+
+ /// Pending queues extend the ready queues with the same ID and the
+ /// PendingFlag set.
+ SchedBoundary(unsigned ID, const Twine &Name):
+ DAG(0), Available(ID, Name+".A"),
+ Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"),
+ CheckPending(false), HazardRec(0), ResourceModel(0),
+ CurrCycle(0), IssueCount(0),
+ MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+
+ ~SchedBoundary() {
+ delete ResourceModel;
+ delete HazardRec;
+ }
+
+ bool isTop() const {
+ return Available.getID() == ConvergingVLIWScheduler::TopQID;
+ }
+
+ bool checkHazard(SUnit *SU);
+
+ void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+ void bumpCycle();
+
+ void bumpNode(SUnit *SU);
+
+ void releasePending();
+
+ void removeReady(SUnit *SU);
+
+ SUnit *pickOnlyChoice();
+ };
+
+ VLIWMachineScheduler *DAG;
+ const TargetRegisterInfo *TRI;
+
+ // State of the top and bottom scheduled instruction boundaries.
+ SchedBoundary Top;
+ SchedBoundary Bot;
+
+public:
+ /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+ enum {
+ TopQID = 1,
+ BotQID = 2,
+ LogMaxQID = 2
+ };
+
+ ConvergingVLIWScheduler():
+ DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+
+ virtual void initialize(VLIWMachineScheduler *dag);
+
+ virtual SUnit *pickNode(bool &IsTopNode);
+
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+
+ virtual void releaseTopNode(SUnit *SU);
+
+ virtual void releaseBottomNode(SUnit *SU);
+
+protected:
+ SUnit *pickNodeBidrectional(bool &IsTopNode);
+
+ int SchedulingCost(ReadyQueue &Q,
+ SUnit *SU, SchedCandidate &Candidate,
+ RegPressureDelta &Delta, bool verbose);
+
+ CandResult pickNodeFromQueue(ReadyQueue &Q,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate);
+#ifndef NDEBUG
+ void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
+ PressureElement P = PressureElement());
+#endif
+};
+
+} // namespace
+
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 7ece408..1e91c39 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -337,7 +337,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
<< "********** Function: "
- << MF.getFunction()->getName() << "\n");
+ << MF.getName() << "\n");
#if 0
// for now disable this, if we move NewValueJump before register
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 55cbc09..a295015 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -109,6 +109,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
DenseMap<unsigned, unsigned> PeepholeMap;
+ DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap;
if (DisableHexagonPeephole) return false;
@@ -117,6 +118,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
MBBb != MBBe; ++MBBb) {
MachineBasicBlock* MBB = MBBb;
PeepholeMap.clear();
+ PeepholeDoubleRegsMap.clear();
// Traverse the basic block.
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
@@ -140,6 +142,24 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // Look for this sequence below
+ // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
+ // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
+ // and convert into
+ // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
+ if (MI->getOpcode() == Hexagon::LSRd_ri) {
+ assert(MI->getNumOperands() == 3);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+ if (Src2.getImm() != 32)
+ continue;
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src1.getReg();
+ PeepholeDoubleRegsMap[DstReg] =
+ std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
+ }
+
// Look for P=NOT(P).
if (!DisablePNotP &&
(MI->getOpcode() == Hexagon::NOT_p)) {
@@ -178,6 +198,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
// Change the 1st operand.
MI->RemoveOperand(1);
MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
+ } else {
+ DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
+ PeepholeDoubleRegsMap.find(SrcReg);
+ if (DI != PeepholeDoubleRegsMap.end()) {
+ std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
+ MI->RemoveOperand(1);
+ MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first,
+ false /*isDef*/,
+ false /*isImp*/,
+ false /*isKill*/,
+ false /*isDead*/,
+ false /*isUndef*/,
+ false /*isEarlyClobber*/,
+ PeepholeSrc.second));
+ }
}
}
}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2c23674..3742486 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -310,6 +310,58 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
Moves.push_back(MachineMove(0, Dst, Src));
}
+// Get the weight in units of pressure for this register class.
+const RegClassWeight &
+HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const {
+ // Each TargetRegisterClass has a per register weight, and weight
+ // limit which must be less than the limits of its pressure sets.
+ static const RegClassWeight RCWeightTable[] = {
+ {1, 32}, // IntRegs
+ {1, 8}, // CRRegs
+ {1, 4}, // PredRegs
+ {2, 16}, // DoubleRegs
+ {0, 0} };
+ return RCWeightTable[RC->getID()];
+}
+
+/// Get the number of dimensions of register pressure.
+unsigned HexagonRegisterInfo::getNumRegPressureSets() const {
+ return 4;
+}
+
+/// Get the name of this register unit pressure set.
+const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const {
+ static const char *const RegPressureSetName[] = {
+ "IntRegsRegSet",
+ "CRRegsRegSet",
+ "PredRegsRegSet",
+ "DoubleRegsRegSet"
+ };
+ assert((Idx < 4) && "Index out of bounds");
+ return RegPressureSetName[Idx];
+}
+
+/// Get the register unit pressure limit for this dimension.
+/// This limit must be adjusted dynamically for reserved registers.
+unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+ static const int RegPressureLimit [] = { 16, 4, 2, 8 };
+ assert((Idx < 4) && "Index out of bounds");
+ return RegPressureLimit[Idx];
+}
+
+const int*
+HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC)
+ const {
+ static const int RCSetsTable[] = {
+ 0, -1, // IntRegs
+ 1, -1, // CRRegs
+ 2, -1, // PredRegs
+ 0, -1, // DoubleRegs
+ -1 };
+ static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 };
+ unsigned SetListStart = RCSetStartTable[RC->getID()];
+ return &RCSetsTable[SetListStart];
+}
unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 85355ae..8820d13 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -87,6 +87,11 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
+ const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
+ unsigned getNumRegPressureSets() const;
+ const char *getRegPressureSetName(unsigned Idx) const;
+ unsigned getRegPressureSetLimit(unsigned Idx) const;
+ const int* getRegClassPressureSets(const TargetRegisterClass *RC) const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index d1076b8..b5ff69a 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -47,6 +47,7 @@ def HexagonModel : SchedMachineModel {
// Max issue per cycle == bundle width.
let IssueWidth = 4;
let Itineraries = HexagonItineraries;
+ let LoadLatency = 1;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 9b41126..5668ae8 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -58,6 +58,7 @@ def HexagonModelV4 : SchedMachineModel {
// Max issue per cycle == bundle width.
let IssueWidth = 4;
let Itineraries = HexagonItinerariesV4;
+ let LoadLatency = 1;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index a7b291f..5688e9c 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "HexagonTargetMachine.h"
#include "Hexagon.h"
#include "HexagonISelLowering.h"
+#include "HexagonMachineScheduler.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/PassManager.h"
@@ -29,6 +30,11 @@ opt<bool> DisableHardwareLoops(
"disable-hexagon-hwloops", cl::Hidden,
cl::desc("Disable Hardware Loops for Hexagon target"));
+static cl::
+opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
/// library. In particular, it seems that it is not possible to get
@@ -42,6 +48,13 @@ extern "C" void LLVMInitializeHexagonTarget() {
RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
}
+static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
+ return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
+ createVLIWMachineSched);
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
///
@@ -83,7 +96,13 @@ namespace {
class HexagonPassConfig : public TargetPassConfig {
public:
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ // Enable MI scheduler.
+ if (!DisableHexagonMISched) {
+ enablePass(&MachineSchedulerID);
+ MachineSchedRegistry::setDefault(createVLIWMachineSched);
+ }
+ }
HexagonTargetMachine &getHexagonTargetMachine() const {
return getTM<HexagonTargetMachine>();
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index a03ed03..3d5f685 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -3474,8 +3474,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// 1. Two loads unless they are volatile.
// 2. Two stores in V4 unless they are volatile.
else if ((DepType == SDep::Order) &&
- !I->hasVolatileMemoryRef() &&
- !J->hasVolatileMemoryRef()) {
+ !I->hasOrderedMemoryRef() &&
+ !J->hasOrderedMemoryRef()) {
if (QRI->Subtarget.hasV4TOps() &&
// hexagonv4 allows dual store.
MCIDI.mayStore() && MCIDJ.mayStore()) {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index d6e6c36..86f75d1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -24,7 +24,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
HasLEB128 = true;
PrivateGlobalPrefix = ".L";
- LCOMMDirectiveType = LCOMM::ByteAlignment;
+ LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
InlineAsmStart = "# InlineAsm Start";
InlineAsmEnd = "# InlineAsm End";
ZeroDirective = "\t.space\t";
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index 38fb0e8..4059403 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -56,6 +56,12 @@ class MBlazeAsmParser : public MCTargetAsmParser {
/// }
+ unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned OperandNum, unsigned &NumMCOperands) {
+ return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum,
+ NumMCOperands);
+ }
public:
MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
@@ -317,10 +323,10 @@ MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
MCInst Inst;
- SMLoc ErrorLoc;
+ unsigned Kind;
unsigned ErrorInfo;
- switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+ switch (MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo)) {
default: break;
case Match_Success:
Out.EmitInstruction(Inst);
@@ -329,10 +335,8 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(IDLoc, "instruction use requires an option to be enabled");
case Match_MnemonicFail:
return Error(IDLoc, "unrecognized instruction mnemonic");
- case Match_ConversionFail:
- return Error(IDLoc, "unable to convert operands to instruction");
- case Match_InvalidOperand:
- ErrorLoc = IDLoc;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0U) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -343,6 +347,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(ErrorLoc, "invalid operand for instruction");
}
+ }
llvm_unreachable("Implement any new match types added!");
}
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 46f5207..daa76e8 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -140,7 +140,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned oi = i == 2 ? 1 : 2;
- DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n";
dbgs() << "<--------->\n" << MI);
int FrameIndex = MI.getOperand(i).getIndex();
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 43bd345..8418b75 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -8,20 +8,36 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
+
class MipsAsmParser : public MCTargetAsmParser {
+ enum FpFormatTy {
+ FP_FORMAT_NONE = -1,
+ FP_FORMAT_S,
+ FP_FORMAT_D,
+ FP_FORMAT_L,
+ FP_FORMAT_W
+ } FpFormat;
+
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
@@ -34,14 +50,67 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool parseMathOperation(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
bool ParseDirective(AsmToken DirectiveID);
- OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
+ MipsAsmParser::OperandMatchResultTy
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
+
+ unsigned
+ getMCInstOperandNum(unsigned Kind, MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned OperandNum, unsigned &NumMCOperands);
+
+ bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
+ StringRef Mnemonic);
+
+ int tryParseRegister(StringRef Mnemonic);
+
+ bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic);
+
+ bool parseMemOffset(const MCExpr *&Res);
+ bool parseRelocOperand(const MCExpr *&Res);
+ MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
+
+ bool isMips64() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips64) != 0;
+ }
+
+ bool isFP64() const {
+ return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
+ }
+
+ int matchRegisterName(StringRef Symbol);
+
+ int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic);
+
+ void setFpFormat(FpFormatTy Format) {
+ FpFormat = Format;
+ }
+
+ void setDefaultFpFormat();
+
+ void setFpFormat(StringRef Format);
+
+ FpFormatTy getFpFormat() {return FpFormat;}
+
+ bool requestsDoubleOperand(StringRef Mnemonic);
+
+ unsigned getReg(int RC,int RegNo);
+
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser() {
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
};
}
@@ -50,6 +119,7 @@ namespace {
/// MipsOperand - Instances of this class represent a parsed Mips machine
/// instruction.
class MipsOperand : public MCParsedAsmOperand {
+
enum KindTy {
k_CondCode,
k_CoprocNum,
@@ -61,18 +131,58 @@ class MipsOperand : public MCParsedAsmOperand {
} Kind;
MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+ union {
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNum;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned Base;
+ const MCExpr *Off;
+ } Mem;
+ };
+
+ SMLoc StartLoc, EndLoc;
+
public:
void addRegOperands(MCInst &Inst, unsigned N) const {
- llvm_unreachable("unimplemented!");
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
}
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const{
- llvm_unreachable("unimplemented!");
+ // Add as immediate when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
}
+
void addImmOperands(MCInst &Inst, unsigned N) const {
- llvm_unreachable("unimplemented!");
+ assert(N == 1 && "Invalid number of operands!");
+ const MCExpr *Expr = getImm();
+ addExpr(Inst,Expr);
}
+
void addMemOperands(MCInst &Inst, unsigned N) const {
- llvm_unreachable("unimplemented!");
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+ const MCExpr *Expr = getMemOff();
+ addExpr(Inst,Expr);
}
bool isReg() const { return Kind == k_Register; }
@@ -82,46 +192,751 @@ public:
StringRef getToken() const {
assert(Kind == k_Token && "Invalid access!");
- return "";
+ return StringRef(Tok.Data, Tok.Length);
}
unsigned getReg() const {
assert((Kind == k_Register) && "Invalid access!");
- return 0;
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert((Kind == k_Immediate) && "Invalid access!");
+ return Imm.Val;
}
+ unsigned getMemBase() const {
+ assert((Kind == k_Memory) && "Invalid access!");
+ return Mem.Base;
+ }
+
+ const MCExpr *getMemOff() const {
+ assert((Kind == k_Memory) && "Invalid access!");
+ return Mem.Off;
+ }
+
+ static MipsOperand *CreateToken(StringRef Str, SMLoc S) {
+ MipsOperand *Op = new MipsOperand(k_Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static MipsOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MipsOperand *CreateMem(unsigned Base, const MCExpr *Off,
+ SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.Off = Off;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
virtual void print(raw_ostream &OS) const {
llvm_unreachable("unimplemented!");
}
};
}
+unsigned MipsAsmParser::
+getMCInstOperandNum(unsigned Kind, MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned OperandNum, unsigned &NumMCOperands) {
+ assert (0 && "getMCInstOperandNum() not supported by the Mips target.");
+ // The Mips backend doesn't currently include the matcher implementation, so
+ // the getMCInstOperandNumImpl() is undefined. This is a temporary
+ // work around.
+ NumMCOperands = 0;
+ return 0;
+}
+
bool MipsAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
+ MCInst Inst;
+ unsigned ErrorInfo;
+ unsigned Kind;
+ unsigned MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo);
+
+ switch (MatchResult) {
+ default: break;
+ case Match_Success: {
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst);
+ return false;
+ }
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction");
+ }
return true;
}
+int MipsAsmParser::matchRegisterName(StringRef Name) {
+
+ int CC = StringSwitch<unsigned>(Name)
+ .Case("zero", Mips::ZERO)
+ .Case("a0", Mips::A0)
+ .Case("a1", Mips::A1)
+ .Case("a2", Mips::A2)
+ .Case("a3", Mips::A3)
+ .Case("v0", Mips::V0)
+ .Case("v1", Mips::V1)
+ .Case("s0", Mips::S0)
+ .Case("s1", Mips::S1)
+ .Case("s2", Mips::S2)
+ .Case("s3", Mips::S3)
+ .Case("s4", Mips::S4)
+ .Case("s5", Mips::S5)
+ .Case("s6", Mips::S6)
+ .Case("s7", Mips::S7)
+ .Case("k0", Mips::K0)
+ .Case("k1", Mips::K1)
+ .Case("sp", Mips::SP)
+ .Case("fp", Mips::FP)
+ .Case("gp", Mips::GP)
+ .Case("ra", Mips::RA)
+ .Case("t0", Mips::T0)
+ .Case("t1", Mips::T1)
+ .Case("t2", Mips::T2)
+ .Case("t3", Mips::T3)
+ .Case("t4", Mips::T4)
+ .Case("t5", Mips::T5)
+ .Case("t6", Mips::T6)
+ .Case("t7", Mips::T7)
+ .Case("t8", Mips::T8)
+ .Case("t9", Mips::T9)
+ .Case("at", Mips::AT)
+ .Case("fcc0", Mips::FCC0)
+ .Default(-1);
+
+ if (CC != -1) {
+ //64 bit register in Mips are following 32 bit definitions.
+ if (isMips64())
+ CC++;
+ return CC;
+ }
+
+ if (Name[0] == 'f') {
+ StringRef NumString = Name.substr(1);
+ unsigned IntVal;
+ if( NumString.getAsInteger(10, IntVal))
+ return -1; //not integer
+ if (IntVal > 31)
+ return -1;
+
+ FpFormatTy Format = getFpFormat();
+
+ if (Format == FP_FORMAT_S || Format == FP_FORMAT_W)
+ return getReg(Mips::FGR32RegClassID, IntVal);
+ if (Format == FP_FORMAT_D) {
+ if(isFP64()) {
+ return getReg(Mips::FGR64RegClassID, IntVal);
+ }
+ //only even numbers available as register pairs
+ if (( IntVal > 31) || (IntVal%2 != 0))
+ return -1;
+ return getReg(Mips::AFGR64RegClassID, IntVal/2);
+ }
+ }
+
+ return -1;
+}
+void MipsAsmParser::setDefaultFpFormat() {
+
+ if (isMips64() || isFP64())
+ FpFormat = FP_FORMAT_D;
+ else
+ FpFormat = FP_FORMAT_S;
+}
+
+bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){
+
+ bool IsDouble = StringSwitch<bool>(Mnemonic.lower())
+ .Case("ldxc1", true)
+ .Case("ldc1", true)
+ .Case("sdxc1", true)
+ .Case("sdc1", true)
+ .Default(false);
+
+ return IsDouble;
+}
+void MipsAsmParser::setFpFormat(StringRef Format) {
+
+ FpFormat = StringSwitch<FpFormatTy>(Format.lower())
+ .Case(".s", FP_FORMAT_S)
+ .Case(".d", FP_FORMAT_D)
+ .Case(".l", FP_FORMAT_L)
+ .Case(".w", FP_FORMAT_W)
+ .Default(FP_FORMAT_NONE);
+}
+
+unsigned MipsAsmParser::getReg(int RC,int RegNo){
+ return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
+}
+
+int MipsAsmParser::matchRegisterByNumber(unsigned RegNum,StringRef Mnemonic) {
+
+ if (Mnemonic.lower() == "rdhwr") {
+ //at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return -1;
+ return Mips::HWR29;
+ }
+
+ if (RegNum > 31)
+ return -1;
+
+ return getReg(Mips::CPURegsRegClassID,RegNum);
+}
+
+int MipsAsmParser::tryParseRegister(StringRef Mnemonic) {
+ const AsmToken &Tok = Parser.getTok();
+ int RegNum = -1;
+
+ if (Tok.is(AsmToken::Identifier)) {
+ std::string lowerCase = Tok.getString().lower();
+ RegNum = matchRegisterName(lowerCase);
+ } else if (Tok.is(AsmToken::Integer))
+ RegNum = matchRegisterByNumber(static_cast<unsigned> (Tok.getIntVal()),
+ Mnemonic.lower());
+ else
+ return RegNum; //error
+ //64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64
+ if (isMips64() && RegNum == Mips::ZERO_64) {
+ if (Mnemonic.find("ddiv") != StringRef::npos)
+ RegNum = Mips::ZERO;
+ }
+ return RegNum;
+}
+
bool MipsAsmParser::
-ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic){
+
+ SMLoc S = Parser.getTok().getLoc();
+ int RegNo = -1;
+
+ //FIXME: we should make a more generic method for CCR
+ if ((Mnemonic == "cfc1" || Mnemonic == "ctc1")
+ && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){
+ RegNo = Parser.getTok().getIntVal(); //get the int value
+ //at the moment only fcc0 is supported
+ if (RegNo == 0)
+ RegNo = Mips::FCC0;
+ } else
+ RegNo = tryParseRegister(Mnemonic);
+ if (RegNo == -1)
+ return true;
+
+ Operands.push_back(MipsOperand::CreateReg(RegNo, S,
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat register token.
+ return false;
+}
+
+bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
+ StringRef Mnemonic) {
+ //Check if the current operand has a custom associated parser, if so, try to
+ //custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return true;
+ case AsmToken::Dollar: {
+ //parse register
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat dollar token.
+ //parse register operand
+ if (!tryParseRegisterOperand(Operands,Mnemonic)) {
+ if (getLexer().is(AsmToken::LParen)) {
+ //check if it is indexed addressing operand
+ Operands.push_back(MipsOperand::CreateToken("(", S));
+ Parser.Lex(); //eat parenthesis
+ if (getLexer().isNot(AsmToken::Dollar))
+ return true;
+
+ Parser.Lex(); //eat dollar
+ if (tryParseRegisterOperand(Operands,Mnemonic))
+ return true;
+
+ if (!getLexer().is(AsmToken::RParen))
+ return true;
+
+ S = Parser.getTok().getLoc();
+ Operands.push_back(MipsOperand::CreateToken(")", S));
+ Parser.Lex();
+ }
+ return false;
+ }
+ //maybe it is a symbol reference
+ StringRef Identifier;
+ if (Parser.ParseIdentifier(Identifier))
+ return true;
+
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
+
+ // Otherwise create a symbol ref.
+ const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ getContext());
+
+ Operands.push_back(MipsOperand::CreateImm(Res, S, E));
+ return false;
+ }
+ case AsmToken::Identifier:
+ case AsmToken::LParen:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ case AsmToken::Integer:
+ case AsmToken::String: {
+ // quoted label names
+ const MCExpr *IdVal;
+ SMLoc S = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(IdVal))
+ return true;
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+ return false;
+ }
+ case AsmToken::Percent: {
+ //it is a symbol reference or constant expression
+ const MCExpr *IdVal;
+ SMLoc S = Parser.getTok().getLoc(); //start location of the operand
+ if (parseRelocOperand(IdVal))
+ return true;
+
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+ return false;
+ }//case AsmToken::Percent
+ }//switch(getLexer().getKind())
+ return true;
+}
+
+bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+
+ Parser.Lex(); //eat % token
+ const AsmToken &Tok = Parser.getTok(); //get next token, operation
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+
+ std::string Str = Tok.getIdentifier().str();
+
+ Parser.Lex(); //eat identifier
+ //now make expression from the rest of the operand
+ const MCExpr *IdVal;
+ SMLoc EndLoc;
+
+ if (getLexer().getKind() == AsmToken::LParen) {
+ while (1) {
+ Parser.Lex(); //eat '(' token
+ if (getLexer().getKind() == AsmToken::Percent) {
+ Parser.Lex(); //eat % token
+ const AsmToken &nextTok = Parser.getTok();
+ if (nextTok.isNot(AsmToken::Identifier))
+ return true;
+ Str += "(%";
+ Str += nextTok.getIdentifier();
+ Parser.Lex(); //eat identifier
+ if (getLexer().getKind() != AsmToken::LParen)
+ return true;
+ } else
+ break;
+ }
+ if (getParser().ParseParenExpression(IdVal,EndLoc))
+ return true;
+
+ while (getLexer().getKind() == AsmToken::RParen)
+ Parser.Lex(); //eat ')' token
+
+ } else
+ return true; //parenthesis must follow reloc operand
+
+ //Check the type of the expression
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
+ //it's a constant, evaluate lo or hi value
+ int Val = MCE->getValue();
+ if (Str == "lo") {
+ Val = Val & 0xffff;
+ } else if (Str == "hi") {
+ Val = (Val & 0xffff0000) >> 16;
+ }
+ Res = MCConstantExpr::Create(Val, getContext());
+ return false;
+ }
+
+ if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
+ //it's a symbol, create symbolic expression from symbol
+ StringRef Symbol = MSRE->getSymbol().getName();
+ MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
+ Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
+ return false;
+ }
return true;
}
+bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+
+ StartLoc = Parser.getTok().getLoc();
+ RegNo = tryParseRegister("");
+ EndLoc = Parser.getTok().getLoc();
+ return (RegNo == (unsigned)-1);
+}
+
+bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
+
+ SMLoc S;
+
+ switch(getLexer().getKind()) {
+ default:
+ return true;
+ case AsmToken::Integer:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ return (getParser().ParseExpression(Res));
+ case AsmToken::Percent:
+ return parseRelocOperand(Res);
+ case AsmToken::LParen:
+ return false; //it's probably assuming 0
+ }
+ return true;
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
+
+ const MCExpr *IdVal = 0;
+ SMLoc S;
+ //first operand is the offset
+ S = Parser.getTok().getLoc();
+
+ if (parseMemOffset(IdVal))
+ return MatchOperand_ParseFail;
+
+ const AsmToken &Tok = Parser.getTok(); //get next token
+ if (Tok.isNot(AsmToken::LParen)) {
+ Error(Parser.getTok().getLoc(), "'(' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat '(' token.
+
+ const AsmToken &Tok1 = Parser.getTok(); //get next token
+ if (Tok1.is(AsmToken::Dollar)) {
+ Parser.Lex(); // Eat '$' token.
+ if (tryParseRegisterOperand(Operands,"")) {
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return MatchOperand_ParseFail;
+ }
+
+ } else {
+ Error(Parser.getTok().getLoc(),"unexpected token in operand");
+ return MatchOperand_ParseFail;
+ }
+
+ const AsmToken &Tok2 = Parser.getTok(); //get next token
+ if (Tok2.isNot(AsmToken::RParen)) {
+ Error(Parser.getTok().getLoc(), "')' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ Parser.Lex(); // Eat ')' token.
+
+ if (IdVal == 0)
+ IdVal = MCConstantExpr::Create(0, getContext());
+
+ //now replace register operand with the mem operand
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ int RegNo = op->getReg();
+ //remove register from operands
+ Operands.pop_back();
+ //and add memory operand
+ Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E));
+ delete op;
+ return MatchOperand_Success;
+}
+
+MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
+
+ MCSymbolRefExpr::VariantKind VK
+ = StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol)
+ .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI)
+ .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO)
+ .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL)
+ .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL)
+ .Case("got", MCSymbolRefExpr::VK_Mips_GOT)
+ .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD)
+ .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM)
+ .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI)
+ .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO)
+ .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL)
+ .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI)
+ .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO)
+ .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP)
+ .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE)
+ .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST)
+ .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI)
+ .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO)
+ .Default(MCSymbolRefExpr::VK_None);
+
+ return VK;
+}
+
+static int ConvertCcString(StringRef CondString) {
+ int CC = StringSwitch<unsigned>(CondString)
+ .Case(".f", 0)
+ .Case(".un", 1)
+ .Case(".eq", 2)
+ .Case(".ueq", 3)
+ .Case(".olt", 4)
+ .Case(".ult", 5)
+ .Case(".ole", 6)
+ .Case(".ule", 7)
+ .Case(".sf", 8)
+ .Case(".ngle", 9)
+ .Case(".seq", 10)
+ .Case(".ngl", 11)
+ .Case(".lt", 12)
+ .Case(".nge", 13)
+ .Case(".le", 14)
+ .Case(".ngt", 15)
+ .Default(-1);
+
+ return CC;
+}
+
+bool MipsAsmParser::
+parseMathOperation(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ //split the format
+ size_t Start = Name.find('.'), Next = Name.rfind('.');
+ StringRef Format1 = Name.slice(Start, Next);
+ //and add the first format to the operands
+ Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc));
+ //now for the second format
+ StringRef Format2 = Name.slice(Next, StringRef::npos);
+ Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc));
+
+ //set the format for the first register
+ setFpFormat(Format1);
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Name)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+
+ }
+ Parser.Lex(); // Eat the comma.
+
+ //set the format for the first register
+ setFpFormat(Format2);
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Name)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
bool MipsAsmParser::
ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return true;
+ //floating point instructions: should register be treated as double?
+ if (requestsDoubleOperand(Name)) {
+ setFpFormat(FP_FORMAT_D);
+ Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+ }
+ else {
+ setDefaultFpFormat();
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Mnemonic = Name.slice(Start, Next);
+
+ Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
+
+ if (Next != StringRef::npos) {
+ //there is a format token in mnemonic
+ //StringRef Rest = Name.slice(Next, StringRef::npos);
+ size_t Dot = Name.find('.', Next+1);
+ StringRef Format = Name.slice(Next, Dot);
+ if (Dot == StringRef::npos) //only one '.' in a string, it's a format
+ Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
+ else {
+ if (Name.startswith("c.")){
+ // floating point compare, add '.' and immediate represent for cc
+ Operands.push_back(MipsOperand::CreateToken(".", NameLoc));
+ int Cc = ConvertCcString(Format);
+ if (Cc == -1) {
+ return Error(NameLoc, "Invalid conditional code");
+ }
+ SMLoc E = SMLoc::getFromPointer(
+ Parser.getTok().getLoc().getPointer() -1 );
+ Operands.push_back(MipsOperand::CreateImm(
+ MCConstantExpr::Create(Cc, getContext()), NameLoc, E));
+ } else {
+ //trunc, ceil, floor ...
+ return parseMathOperation(Name, NameLoc, Operands);
+ }
+
+ //the rest is a format
+ Format = Name.slice(Dot, StringRef::npos);
+ Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
+ }
+
+ setFpFormat(Format);
+ }
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Name)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ while (getLexer().is(AsmToken::Comma) ) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Name)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
}
bool MipsAsmParser::
ParseDirective(AsmToken DirectiveID) {
- return true;
-}
-MipsAsmParser::OperandMatchResultTy MipsAsmParser::
- parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&) {
- return MatchOperand_ParseFail;
+ if (DirectiveID.getString() == ".ent") {
+ //ignore this directive for now
+ Parser.Lex();
+ return false;
+ }
+
+ if (DirectiveID.getString() == ".end") {
+ //ignore this directive for now
+ Parser.Lex();
+ return false;
+ }
+
+ if (DirectiveID.getString() == ".frame") {
+ //ignore this directive for now
+ Parser.EatToEndOfStatement();
+ return false;
+ }
+
+ if (DirectiveID.getString() == ".set") {
+ //ignore this directive for now
+ Parser.EatToEndOfStatement();
+ return false;
+ }
+
+ if (DirectiveID.getString() == ".fmask") {
+ //ignore this directive for now
+ Parser.EatToEndOfStatement();
+ return false;
+ }
+
+ if (DirectiveID.getString() == ".mask") {
+ //ignore this directive for now
+ Parser.EatToEndOfStatement();
+ return false;
+ }
+
+ if (DirectiveID.getString() == ".gpword") {
+ //ignore this directive for now
+ Parser.EatToEndOfStatement();
+ return false;
+ }
+
+ return true;
}
extern "C" void LLVMInitializeMipsAsmParser() {
@@ -130,3 +945,7 @@ extern "C" void LLVMInitializeMipsAsmParser() {
RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target);
RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget);
}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MipsGenAsmMatcher.inc"
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index f535c50..0f84358 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_target(MipsCodeGen
MipsAsmPrinter.cpp
MipsCodeEmitter.cpp
MipsDelaySlotFiller.cpp
+ MipsDirectObjLower.cpp
MipsELFWriterInfo.cpp
MipsJITInfo.cpp
MipsInstrInfo.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 234455e..9603327 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -122,7 +122,7 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
{
switch (RegEnum) {
case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
- case Mips::D0:
+ case Mips::D0: case Mips::FCC0:
return 0;
case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
return 1;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index b8489ca..5d240fe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -56,7 +56,7 @@ namespace {
MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
bool _isN64, bool IsLittleEndian)
: MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
- /*HasRelocationAddend*/ false,
+ /*HasRelocationAddend*/ (_isN64) ? true : false,
/*IsN64*/ _isN64) {}
MipsELFObjectWriter::~MipsELFObjectWriter() {}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 8dab62d..1d7370a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -143,7 +143,11 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
- assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions");
+
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm()) return MO.getImm();
+ assert(MO.isExpr() &&
+ "getBranchTargetOpValue expects only expressions or immediates");
const MCExpr *Expr = MO.getExpr();
Fixups.push_back(MCFixup::Create(0, Expr,
@@ -159,7 +163,10 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
- assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions");
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm()) return MO.getImm();
+ assert(MO.isExpr() &&
+ "getJumpTargetOpValue expects only expressions or an immediate");
const MCExpr *Expr = MO.getExpr();
Fixups.push_back(MCFixup::Create(0, Expr,
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 2bc286b..ec84ad8 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
: MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0),
- RI(*tm.getSubtargetImpl(), *this) {}
+ RI(*tm.getSubtargetImpl()) {}
const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
return RI;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index c15d1bf..106e82f 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -38,9 +38,8 @@
using namespace llvm;
-Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
- const TargetInstrInfo &TII)
- : MipsRegisterInfo(ST, TII) {}
+Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST)
+ : MipsRegisterInfo(ST) {}
// This function eliminate ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index 3f4b3a7..c702a15 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -17,11 +17,11 @@
#include "MipsRegisterInfo.h"
namespace llvm {
+class Mips16InstrInfo;
class Mips16RegisterInfo : public MipsRegisterInfo {
public:
- Mips16RegisterInfo(const MipsSubtarget &Subtarget,
- const TargetInstrInfo &TII);
+ Mips16RegisterInfo(const MipsSubtarget &Subtarget);
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 20fc178..147be5d 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -110,9 +110,9 @@ def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>;
def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>;
def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>;
let Pattern = []<dag> in {
-def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>;
-def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>;
-def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>;
+ def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>;
+ def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>;
+ def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>;
}
}
// Rotate Instructions
@@ -217,7 +217,15 @@ let DecoderNamespace = "Mips64" in {
def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
def DEXT : ExtBase<3, "dext", CPU64Regs>;
+let Pattern = []<dag> in {
+ def DEXTU : ExtBase<2, "dextu", CPU64Regs>;
+ def DEXTM : ExtBase<1, "dextm", CPU64Regs>;
+}
def DINS : InsBase<7, "dins", CPU64Regs>;
+let Pattern = []<dag> in {
+ def DINSU : InsBase<6, "dinsu", CPU64Regs>;
+ def DINSM : InsBase<5, "dinsm", CPU64Regs>;
+}
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index dc8fbd0..99b163e 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -91,7 +91,7 @@ void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) {
// Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit.
int64_t Imm = SignExtend64<16>(Seq[0].ImmOpnd);
- int64_t ShiftedImm = Imm << (Seq[1].ImmOpnd - 16);
+ int64_t ShiftedImm = (uint64_t)Imm << (Seq[1].ImmOpnd - 16);
if (!isInt<16>(ShiftedImm))
return;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 00ff754..e780134 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "mips-asm-printer"
#include "Mips.h"
#include "MipsAsmPrinter.h"
+#include "MipsDirectObjLower.h"
#include "MipsInstrInfo.h"
#include "MipsMCInstLower.h"
#include "InstPrinter/MipsInstPrinter.h"
@@ -58,33 +59,31 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- // Direct object specific instruction lowering
- if (!OutStreamer.hasRawTextSupport())
- switch (MI->getOpcode()) {
- case Mips::DSLL:
- case Mips::DSRL:
- case Mips::DSRA:
- assert(MI->getNumOperands() == 3 &&
- "Invalid no. of machine operands for shift!");
- assert(MI->getOperand(2).isImm());
- int64_t Shift = MI->getOperand(2).getImm();
- if (Shift > 31) {
- MCInst TmpInst0;
- MCInstLowering.LowerLargeShift(MI, TmpInst0, Shift - 32);
- OutStreamer.EmitInstruction(TmpInst0);
- return;
- }
- break;
- }
-
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
do {
MCInst TmpInst0;
MCInstLowering.Lower(I++, TmpInst0);
+
+ // Direct object specific instruction lowering
+ if (!OutStreamer.hasRawTextSupport()){
+ switch (TmpInst0.getOpcode()) {
+ // If shift amount is >= 32 it the inst needs to be lowered further
+ case Mips::DSLL:
+ case Mips::DSRL:
+ case Mips::DSRA:
+ Mips::LowerLargeShift(TmpInst0);
+ break;
+ // Double extract instruction is chosen by pos and size operands
+ case Mips::DEXT:
+ case Mips::DINS:
+ Mips::LowerDextDins(TmpInst0);
+ }
+ }
+
OutStreamer.EmitInstruction(TmpInst0);
- } while ((I != E) && I->isInsideBundle());
+ } while ((I != E) && I->isInsideBundle()); // Delay slot check
}
//===----------------------------------------------------------------------===//
@@ -214,7 +213,7 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
case MipsSubtarget::N32: return "abiN32";
case MipsSubtarget::N64: return "abi64";
case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
- default: llvm_unreachable("Unknown Mips ABI");;
+ default: llvm_unreachable("Unknown Mips ABI");
}
}
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index cb7022b..5433295 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -30,7 +30,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -139,7 +138,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
do {
DEBUG(errs() << "JITTing function '"
- << MF.getFunction()->getName() << "'\n");
+ << MF.getName() << "'\n");
MCE.startFunction(MF);
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 2bba8a3..e3c8ed7 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -30,10 +30,11 @@ STATISTIC(FilledSlots, "Number of delay slots filled");
STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
" are not NOP.");
-static cl::opt<bool> EnableDelaySlotFiller(
- "enable-mips-delay-filler",
+static cl::opt<bool> DisableDelaySlotFiller(
+ "disable-mips-delay-filler",
cl::init(false),
- cl::desc("Fill the Mips delay slots useful instructions."),
+ cl::desc("Disable the delay slot filler, which attempts to fill the Mips"
+ "delay slots with useful instructions."),
cl::Hidden);
// This option can be used to silence complaints by machine verifier passes.
@@ -114,7 +115,9 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
InstrIter D;
- if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) {
+ // Delay slot filling is disabled at -O0.
+ if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) &&
+ findDelayInstr(MBB, I, D)) {
MBB.splice(llvm::next(I), &MBB, D);
++UsefulSlots;
} else
diff --git a/lib/Target/Mips/MipsDirectObjLower.cpp b/lib/Target/Mips/MipsDirectObjLower.cpp
new file mode 100644
index 0000000..0d74db8
--- /dev/null
+++ b/lib/Target/Mips/MipsDirectObjLower.cpp
@@ -0,0 +1,86 @@
+//===-- MipsDirectObjLower.cpp - Mips LLVM direct object lowering -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Mips MCInst records that are normally
+// left to the assembler to lower such as large shifts.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsDirectObjLower.h"
+#include "MipsInstrInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+// If the D<shift> instruction has a shift amount that is greater
+// than 31 (checked in calling routine), lower it to a D<shift>32 instruction
+void Mips::LowerLargeShift(MCInst& Inst) {
+
+ assert(Inst.getNumOperands() == 3 && "Invalid no. of operands for shift!");
+ assert(Inst.getOperand(2).isImm());
+
+ bool isLarge = false;
+ int64_t Shift;
+ Shift = Inst.getOperand(2).getImm();
+ if (Shift > 31) {
+ Shift -= 32;
+ isLarge = true;
+ }
+
+ // saminus32
+ (Inst.getOperand(2)).setImm(Shift);
+
+ if (isLarge)
+ switch (Inst.getOpcode()) {
+ default:
+ // Calling function is not synchronized
+ llvm_unreachable("Unexpected shift instruction");
+ case Mips::DSLL:
+ Inst.setOpcode(Mips::DSLL32);
+ return;
+ case Mips::DSRL:
+ Inst.setOpcode(Mips::DSRL32);
+ return;
+ case Mips::DSRA:
+ Inst.setOpcode(Mips::DSRA32);
+ return;
+ }
+}
+
+// Pick a DEXT or DINS instruction variant based on the pos and size operands
+void Mips::LowerDextDins(MCInst& InstIn) {
+ int Opcode = InstIn.getOpcode();
+
+ if (Opcode == Mips::DEXT)
+ assert(InstIn.getNumOperands() == 4 &&
+ "Invalid no. of machine operands for DEXT!");
+ else // Only DEXT and DINS are possible
+ assert(InstIn.getNumOperands() == 5 &&
+ "Invalid no. of machine operands for DINS!");
+
+ assert(InstIn.getOperand(2).isImm());
+ int64_t pos = InstIn.getOperand(2).getImm();
+ assert(InstIn.getOperand(3).isImm());
+ int64_t size = InstIn.getOperand(3).getImm();
+
+ if (size <= 32) {
+ if ((pos < 32)) { // DEXT/DINS, do nothing
+ return;
+ } else { // DEXTU/DINSU
+ InstIn.getOperand(2).setImm(pos - 32);
+ InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTU : Mips::DINSU);
+ return;
+ }
+ } else { // DEXTM/DINSM
+ assert(pos < 32 && "DEXT/DINS cannot have both size and pos > 32");
+ InstIn.getOperand(3).setImm(size - 32);
+ InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTM : Mips::DINSM);
+ return;
+ }
+}
diff --git a/lib/Target/Mips/MipsDirectObjLower.h b/lib/Target/Mips/MipsDirectObjLower.h
new file mode 100644
index 0000000..8813cc9
--- /dev/null
+++ b/lib/Target/Mips/MipsDirectObjLower.h
@@ -0,0 +1,28 @@
+//===-- MipsDirectObjLower.h - Mips LLVM direct object lowering *- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSDIRECTOBJLOWER_H
+#define MIPSDIRECTOBJLOWER_H
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCInst;
+ class MCStreamer;
+
+ namespace Mips {
+ /// MipsDirectObjLower - This name space is used to lower MCInstr in cases
+ // where the assembler usually finishes the lowering
+ // such as large shifts.
+ void LowerLargeShift(MCInst &Inst);
+ void LowerDextDins(MCInst &Inst);
+ }
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 5a97c17..4205223 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -337,8 +337,9 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
// Generate:
// lui $2, %hi($CPI1_0)
// lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
- SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0);
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
isa<JumpTableSDNode>(Opnd0)) {
Base = Addr.getOperand(0);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c5207c6..aa7b459 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -1571,15 +1571,15 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
SDVTList VTs = DAG.getVTList(MVT::i32);
- MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
+ const MipsTargetObjectFile &TLOF = (const MipsTargetObjectFile&)getObjFileLowering();
// %gp_rel relocation
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GPREL);
SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
- SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
+ SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode);
}
// %hi/%lo relocation
SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 50e3eb5..8ade891 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -262,46 +262,3 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
}
}
}
-
-unsigned
-llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII,
- MachineBasicBlock& MBB,
- MachineBasicBlock::iterator II, DebugLoc DL,
- bool LastInstrIsADDiu,
- MipsAnalyzeImmediate::Inst *LastInst) {
- MipsAnalyzeImmediate AnalyzeImm;
- unsigned Size = IsN64 ? 64 : 32;
- unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi;
- unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT;
-
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu);
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
-
- if (LastInst && (Seq.size() == 1)) {
- *LastInst = *Inst;
- return 0;
- }
-
- // The first instruction can be a LUi, which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == LUi)
- BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
- else
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- // Build the remaining instructions in Seq. Skip the last instruction if
- // LastInst is not 0.
- for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst)
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- if (LastInst)
- *LastInst = *Inst;
-
- return Seq.size() - !!LastInst;
-}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 7d56259..aca2bc7 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -88,18 +88,6 @@ private:
const SmallVectorImpl<MachineOperand>& Cond) const;
};
-namespace Mips {
- /// Emit a series of instructions to load an immediate. All instructions
- /// except for the last one are emitted. The function returns the number of
- /// MachineInstrs generated. The opcode-immediate pair of the last
- /// instruction is returned in LastInst, if it is not 0.
- unsigned
- loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII,
- MachineBasicBlock& MBB, MachineBasicBlock::iterator II,
- DebugLoc DL, bool LastInstrIsADDiu,
- MipsAnalyzeImmediate::Inst *LastInst);
-}
-
/// Create MipsInstrInfo objects.
const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM);
const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index fd952ef..6b6005f 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -74,9 +74,10 @@ def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>;
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPOptInGlue, SDNPOutGlue]>;
// MAdd*/MSub* nodes
def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
@@ -110,7 +111,7 @@ def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntBinOp>;
def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
[SDNPHasChain, SDNPInGlue]>;
-def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>;
+def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>;
def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>;
@@ -1079,6 +1080,26 @@ def EXT : ExtBase<0, "ext", CPURegs>;
def INS : InsBase<4, "ins", CPURegs>;
//===----------------------------------------------------------------------===//
+// Instruction aliases
+//===----------------------------------------------------------------------===//
+def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>;
+def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>;
+def : InstAlias<"addu $rs,$rt,$imm",
+ (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
+def : InstAlias<"add $rs,$rt,$imm",
+ (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
+def : InstAlias<"and $rs,$rt,$imm",
+ (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
+def : InstAlias<"j $rs", (JR CPURegs:$rs)>;
+def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>;
+def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>;
+def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>;
+def : InstAlias<"slt $rs,$rt,$imm",
+ (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
+def : InstAlias<"xor $rs,$rt,$imm",
+ (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
+
+//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index f78203f..b9dbd52 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -10,6 +10,10 @@
// This pass expands a branch or jump instruction into a long branch if its
// offset is too large to fit into its immediate field.
//
+// FIXME:
+// 1. Fix pc-region jump instructions which cross 256MB segment boundaries.
+// 2. If program has inline assembly statements whose size cannot be
+// determined accurately, load branch target addresses from the GOT.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-long-branch"
@@ -48,7 +52,7 @@ namespace {
typedef MachineBasicBlock::reverse_iterator ReverseIter;
struct MBBInfo {
- uint64_t Size;
+ uint64_t Size, Address;
bool HasLongBranch;
MachineInstr *Br;
@@ -61,7 +65,10 @@ namespace {
static char ID;
MipsLongBranch(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm),
- TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())) {}
+ TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_),
+ ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
+ LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 13 : 9)) {}
virtual const char *getPassName() const {
return "Mips Long Branch";
@@ -81,6 +88,9 @@ namespace {
const MipsInstrInfo *TII;
MachineFunction *MF;
SmallVector<MBBInfo, 16> MBBInfos;
+ bool IsPIC;
+ unsigned ABI;
+ unsigned LongBranchSeqSize;
};
char MipsLongBranch::ID = 0;
@@ -230,12 +240,6 @@ void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
// Expand branch instructions to long branches.
void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
- I.HasLongBranch = true;
-
- bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
- unsigned ABI = TM.getSubtarget<MipsSubtarget>().getTargetABI();
- bool N64 = ABI == MipsSubtarget::N64;
-
MachineBasicBlock::iterator Pos;
MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br);
DebugLoc DL = I.Br->getDebugLoc();
@@ -248,101 +252,105 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
MBB->addSuccessor(LongBrMBB);
if (IsPIC) {
- // $longbr:
- // addiu $sp, $sp, -regsize * 2
- // sw $ra, 0($sp)
- // bal $baltgt
- // sw $a3, regsize($sp)
- // $baltgt:
- // lui $a3, %hi($baltgt)
- // lui $at, %hi($tgt)
- // addiu $a3, $a3, %lo($baltgt)
- // addiu $at, $at, %lo($tgt)
- // subu $at, $at, $a3
- // addu $at, $ra, $at
- //
- // if n64:
- // lui $a3, %highest($baltgt)
- // lui $ra, %highest($tgt)
- // addiu $a3, $a3, %higher($baltgt)
- // addiu $ra, $ra, %higher($tgt)
- // dsll $a3, $a3, 32
- // dsll $ra, $ra, 32
- // subu $at, $at, $a3
- // addu $at, $at, $ra
- //
- // lw $ra, 0($sp)
- // lw $a3, regsize($sp)
- // jr $at
- // addiu $sp, $sp, regsize * 2
- // $fallthrough:
- //
- MF->getInfo<MipsFunctionInfo>()->setEmitNOAT();
MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(FallThroughMBB, BalTgtMBB);
LongBrMBB->addSuccessor(BalTgtMBB);
BalTgtMBB->addSuccessor(TgtMBB);
- int RegSize = N64 ? 8 : 4;
- unsigned AT = N64 ? Mips::AT_64 : Mips::AT;
- unsigned A3 = N64 ? Mips::A3_64 : Mips::A3;
- unsigned SP = N64 ? Mips::SP_64 : Mips::SP;
- unsigned RA = N64 ? Mips::RA_64 : Mips::RA;
- unsigned Load = N64 ? Mips::LD_P8 : Mips::LW;
- unsigned Store = N64 ? Mips::SD_P8 : Mips::SW;
- unsigned LUi = N64 ? Mips::LUi64 : Mips::LUi;
- unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu;
- unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu;
- unsigned SUBu = N64 ? Mips::SUBu : Mips::SUBu;
- unsigned JR = N64 ? Mips::JR64 : Mips::JR;
-
- Pos = LongBrMBB->begin();
-
- BuildMI(*LongBrMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP)
- .addImm(-RegSize * 2);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(RA).addReg(SP)
- .addImm(0);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(A3).addReg(SP)
- .addImm(RegSize)->setIsInsideBundle();
-
- Pos = BalTgtMBB->begin();
-
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3)
- .addMBB(BalTgtMBB, MipsII::MO_ABS_HI);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), AT)
- .addMBB(TgtMBB, MipsII::MO_ABS_HI);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3)
- .addMBB(BalTgtMBB, MipsII::MO_ABS_LO);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT)
- .addMBB(TgtMBB, MipsII::MO_ABS_LO);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(RA).addReg(AT);
-
- if (N64) {
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3)
- .addMBB(BalTgtMBB, MipsII::MO_HIGHEST);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), RA)
- .addMBB(TgtMBB, MipsII::MO_HIGHEST);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3)
- .addMBB(BalTgtMBB, MipsII::MO_HIGHER);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), RA).addReg(RA)
- .addMBB(TgtMBB, MipsII::MO_HIGHER);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), A3).addReg(A3)
- .addImm(32);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), RA).addReg(RA)
- .addImm(32);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(AT).addReg(RA);
- I.Size += 4 * 8;
+ int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address;
+ int64_t Offset = TgtAddress - (I.Address + I.Size - 20);
+ int64_t Lo = SignExtend64<16>(Offset & 0xffff);
+ int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff);
+
+ if (ABI != MipsSubtarget::N64) {
+ // $longbr:
+ // addiu $sp, $sp, -8
+ // sw $ra, 0($sp)
+ // bal $baltgt
+ // lui $at, %hi($tgt - $baltgt)
+ // $baltgt:
+ // addiu $at, $at, %lo($tgt - $baltgt)
+ // addu $at, $ra, $at
+ // lw $ra, 0($sp)
+ // jr $at
+ // addiu $sp, $sp, 8
+ // $fallthrough:
+ //
+
+ Pos = LongBrMBB->begin();
+
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(-8);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA)
+ .addReg(Mips::SP).addImm(0);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi)
+ ->setIsInsideBundle();
+
+ Pos = BalTgtMBB->begin();
+
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT)
+ .addReg(Mips::AT).addImm(Lo);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDu), Mips::AT)
+ .addReg(Mips::RA).addReg(Mips::AT);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
+ .addReg(Mips::SP).addImm(0);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(8)->setIsInsideBundle();
+ } else {
+ // $longbr:
+ // daddiu $sp, $sp, -16
+ // sd $ra, 0($sp)
+ // lui64 $at, %highest($tgt - $baltgt)
+ // daddiu $at, $at, %higher($tgt - $baltgt)
+ // dsll $at, $at, 16
+ // daddiu $at, $at, %hi($tgt - $baltgt)
+ // bal $baltgt
+ // dsll $at, $at, 16
+ // $baltgt:
+ // daddiu $at, $at, %lo($tgt - $baltgt)
+ // daddu $at, $ra, $at
+ // ld $ra, 0($sp)
+ // jr64 $at
+ // daddiu $sp, $sp, 16
+ // $fallthrough:
+ //
+
+ int64_t Higher = SignExtend64<16>(((Offset + 0x80008000) >> 32) & 0xffff);
+ int64_t Highest =
+ SignExtend64<16>(((Offset + 0x800080008000LL) >> 48) & 0xffff);
+
+ Pos = LongBrMBB->begin();
+
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+ .addReg(Mips::SP_64).addImm(-16);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64)
+ .addReg(Mips::SP_64).addImm(0);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi64), Mips::AT_64)
+ .addImm(Highest);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(Higher);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(16);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(Hi);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(16)->setIsInsideBundle();
+
+ Pos = BalTgtMBB->begin();
+
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
+ .addReg(Mips::AT_64).addImm(Lo);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDu), Mips::AT_64)
+ .addReg(Mips::RA_64).addReg(Mips::AT_64);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
+ .addReg(Mips::SP_64).addImm(0);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+ .addReg(Mips::SP_64).addImm(16)->setIsInsideBundle();
}
-
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), RA).addReg(SP).addImm(0);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), A3).addReg(SP).addImm(RegSize);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(JR)).addReg(AT);
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP)
- .addImm(RegSize * 2)->setIsInsideBundle();
- I.Size += 4 * 14;
} else {
// $longbr:
// j $tgt
@@ -353,7 +361,6 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
LongBrMBB->addSuccessor(TgtMBB);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle();
- I.Size += 4 * 2;
}
if (I.Br->isUnconditionalBranch()) {
@@ -401,19 +408,36 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
if (!I->Br || I->HasLongBranch)
continue;
- if (!ForceLongBranch)
- // Check if offset fits into 16-bit immediate field of branches.
- if (isInt<16>(computeOffset(I->Br) / 4))
- continue;
+ // Check if offset fits into 16-bit immediate field of branches.
+ if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4))
+ continue;
- expandToLongBranch(*I);
+ I->HasLongBranch = true;
+ I->Size += LongBranchSeqSize * 4;
++LongBranches;
EverMadeChange = MadeChange = true;
}
}
- if (EverMadeChange)
- MF->RenumberBlocks();
+ if (!EverMadeChange)
+ return true;
+
+ // Compute basic block addresses.
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ MF->getInfo<MipsFunctionInfo>()->setEmitNOAT();
+
+ uint64_t Address = 0;
+
+ for (I = MBBInfos.begin(); I != E; Address += I->Size, ++I)
+ I->Address = Address;
+ }
+
+ // Do the expansion.
+ for (I = MBBInfos.begin(); I != E; ++I)
+ if (I->HasLongBranch)
+ expandToLongBranch(*I);
+
+ MF->RenumberBlocks();
return true;
}
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index d4c5e6d..5fa6339 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -11,7 +11,6 @@
// MCInst records.
//
//===----------------------------------------------------------------------===//
-
#include "MipsMCInstLower.h"
#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
@@ -161,31 +160,3 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
-// If the D<shift> instruction has a shift amount that is greater
-// than 31 (checked in calling routine), lower it to a D<shift>32 instruction
-void MipsMCInstLower::LowerLargeShift(const MachineInstr *MI,
- MCInst& Inst,
- int64_t Shift) {
- // rt
- Inst.addOperand(LowerOperand(MI->getOperand(0)));
- // rd
- Inst.addOperand(LowerOperand(MI->getOperand(1)));
- // saminus32
- Inst.addOperand(MCOperand::CreateImm(Shift));
-
- switch (MI->getOpcode()) {
- default:
- // Calling function is not synchronized
- llvm_unreachable("Unexpected shift instruction");
- break;
- case Mips::DSLL:
- Inst.setOpcode(Mips::DSLL32);
- break;
- case Mips::DSRL:
- Inst.setOpcode(Mips::DSRL32);
- break;
- case Mips::DSRA:
- Inst.setOpcode(Mips::DSRA32);
- break;
- }
-}
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 0abb996..3eab5a4 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -33,7 +33,6 @@ public:
MipsMCInstLower(MipsAsmPrinter &asmprinter);
void Initialize(Mangler *mang, MCContext *C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
- void LowerLargeShift(const MachineInstr *MI, MCInst &Inst, int64_t Shift);
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index ae6ae3a..79a142a 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -22,7 +22,6 @@
#include "llvm/Constants.h"
#include "llvm/DebugInfo.h"
#include "llvm/Type.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -43,9 +42,8 @@
using namespace llvm;
-MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
- const TargetInstrInfo &tii)
- : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {}
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
+ : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {}
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
@@ -131,6 +129,12 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::RA_64);
}
+ // Reserve GP if small section is used.
+ if (Subtarget.useSmallSection()) {
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::GP_64);
+ }
+
return Reserved;
}
@@ -160,7 +164,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
"Instr doesn't have FrameIndex operand!");
}
- DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ DEBUG(errs() << "\nFunction : " << MF.getName() << "\n";
errs() << "<--------->\n" << MI);
int FrameIndex = MI.getOperand(i).getIndex();
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 9a05e94..78adf7f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -22,16 +22,14 @@
namespace llvm {
class MipsSubtarget;
-class TargetInstrInfo;
class Type;
class MipsRegisterInfo : public MipsGenRegisterInfo {
protected:
const MipsSubtarget &Subtarget;
- const TargetInstrInfo &TII;
public:
- MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
+ MipsRegisterInfo(const MipsSubtarget &Subtarget);
/// getRegisterNumbering - Given the enum value for some register, e.g.
/// Mips::RA, return the number that it corresponds to (e.g. 31).
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index eeb1de3..e4b44ef 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -260,14 +260,53 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
if (isInt<16>(Amount))// addi sp, sp, amount
BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
else { // Expand immediate that doesn't fit in 16-bit.
- unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
-
MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT();
- Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0);
- BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg);
+ unsigned Reg = loadImmediate(Amount, MBB, I, DL, 0);
+ BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg);
}
}
+/// This function generates the sequence of instructions needed to get the
+/// result of adding register REG and immediate IMM.
+unsigned
+MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned *NewImm) const {
+ MipsAnalyzeImmediate AnalyzeImm;
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ unsigned Size = STI.isABI_N64() ? 64 : 32;
+ unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi;
+ unsigned ZEROReg = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
+ bool LastInstrIsADDiu = NewImm;
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu);
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+
+ assert(Seq.size() && (!LastInstrIsADDiu || (Seq.size() > 1)));
+
+ // The first instruction can be a LUi, which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == LUi)
+ BuildMI(MBB, II, DL, get(LUi), ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+ else
+ BuildMI(MBB, II, DL, get(Inst->Opc), ATReg).addReg(ZEROReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ // Build the remaining instructions in Seq.
+ for (++Inst; Inst != Seq.end() - LastInstrIsADDiu; ++Inst)
+ BuildMI(MBB, II, DL, get(Inst->Opc), ATReg).addReg(ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ if (LastInstrIsADDiu)
+ *NewImm = Inst->ImmOpnd;
+
+ return ATReg;
+}
+
unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 346e74d..55b78b2 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -15,7 +15,6 @@
#define MIPSSEINSTRUCTIONINFO_H
#include "MipsInstrInfo.h"
-#include "MipsAnalyzeImmediate.h"
#include "MipsSERegisterInfo.h"
namespace llvm {
@@ -70,6 +69,13 @@ public:
void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
+ /// Emit a series of instructions to load an immediate. If NewImm is a
+ /// non-NULL parameter, the last instruction is not emitted, but instead
+ /// its immediate operand is returned in NewImm.
+ unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned *NewImm) const;
+
private:
virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 043a1ef..d868f73 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -40,8 +40,8 @@
using namespace llvm;
MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST,
- const TargetInstrInfo &TII)
- : MipsRegisterInfo(ST, TII) {}
+ const MipsSEInstrInfo &I)
+ : MipsRegisterInfo(ST), TII(I) {}
// This function eliminate ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
@@ -122,15 +122,14 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
DebugLoc DL = II->getDebugLoc();
unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
- MipsAnalyzeImmediate::Inst LastInst(0, 0);
+ unsigned NewImm;
MipsFI->setEmitNOAT();
- Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
- &LastInst);
- BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
+ unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm);
+ BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(Reg);
FrameReg = ATReg;
- Offset = SignExtend64<16>(LastInst.ImmOpnd);
+ Offset = SignExtend64<16>(NewImm);
}
MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index 4b17b33..b4eab65 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -18,11 +18,14 @@
#include "MipsRegisterInfo.h"
namespace llvm {
+class MipsSEInstrInfo;
class MipsSERegisterInfo : public MipsRegisterInfo {
+ const MipsSEInstrInfo &TII;
+
public:
MipsSERegisterInfo(const MipsSubtarget &Subtarget,
- const TargetInstrInfo &TII);
+ const MipsSEInstrInfo &TII);
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 11ff809..ac83d83 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -25,7 +25,8 @@ using namespace llvm;
void MipsSubtarget::anchor() { }
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool little) :
+ const std::string &FS, bool little,
+ Reloc::Model RM) :
MipsGenSubtargetInfo(TT, CPU, FS),
MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
@@ -54,6 +55,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
// Is the target system Linux ?
if (TT.find("linux") == std::string::npos)
IsLinux = false;
+
+ // Set UseSmallSection.
+ UseSmallSection = !IsLinux && (RM == Reloc::Static);
}
bool
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index ba15362..0595e8d 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -65,6 +65,9 @@ protected:
// isLinux - Target system is Linux. Is false we consider ELFOS for now.
bool IsLinux;
+ // UseSmallSection - Small section is used.
+ bool UseSmallSection;
+
/// Features related to the presence of specific instructions.
// HasSEInReg - SEB and SEH (signext in register) instructions.
@@ -109,7 +112,7 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
MipsSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool little);
+ const std::string &FS, bool little, Reloc::Model RM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -133,6 +136,7 @@ public:
bool inMips16Mode() const { return InMips16Mode; }
bool isAndroid() const { return IsAndroid; }
bool isLinux() const { return IsLinux; }
+ bool useSmallSection() const { return UseSmallSection; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 2928a73..b70542b 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -42,7 +42,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool isLittle)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle),
+ Subtarget(TT, CPU, FS, isLittle, RM),
DataLayout(isLittle ?
(Subtarget.isABI_N64() ?
"e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 04dc60a..1f5e34f 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -26,6 +26,7 @@ SSThreshold("mips-ssection-threshold", cl::Hidden,
void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
SmallDataSection =
getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
@@ -60,9 +61,10 @@ bool MipsTargetObjectFile::
IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
SectionKind Kind) const {
- // Only use small section for non linux targets.
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
- if (Subtarget.isLinux())
+
+ // Return if small section is not available.
+ if (!Subtarget.useSmallSection())
return false;
// Only global variables, not functions.
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index d175e3e..413142e 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -137,7 +137,7 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
char Value = MI->getOperand(OpNo).getImm();
- Value = (Value << (32-5)) >> (32-5);
+ Value = SignExtend32<5>(Value);
O << (int)Value;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 245b457..b9ea8b5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
- LCOMMDirectiveType = LCOMM::NoAlignment;
AssemblerDialect = 0; // Old-Style mnemonics.
}
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index b7f1688..cb15dad 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -35,6 +35,10 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
+def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E500mc", "">;
+def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E5500", "">;
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
@@ -94,6 +98,12 @@ def : Processor<"g5", G5Itineraries,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"e500mc", PPCE500mcModel,
+ [DirectiveE500mc, FeatureMFOCRF,
+ FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+def : ProcessorModel<"e5500", PPCE5500Model,
+ [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
+ FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
FeatureMFOCRF, FeatureFSqrt,
FeatureSTFIWX, FeatureISEL,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f76b89c..6e0e8bb 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -109,6 +109,8 @@ namespace {
bool doFinalization(Module &M);
virtual void EmitFunctionEntryLabel();
+
+ void EmitFunctionBodyEnd();
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
@@ -345,23 +347,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitLabel(PICBase);
return;
}
+ case PPC::LDtocJTI:
+ case PPC::LDtocCPT:
case PPC::LDtoc: {
// Transform %X3 = LDtoc <ga:@min1>, %X2
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
-
+
// Change the opcode to LD, and the global address operand to be a
// reference to the TOC entry we will synthesize later.
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
- assert(MO.isGlobal());
-
- // Map symbol -> label of TOC entry.
- MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())];
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = 0;
+ if (MO.isGlobal())
+ MOSymbol = Mang->getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+ MCSymbol *&TOCEntry = TOC[MOSymbol];
if (TOCEntry == 0)
TOCEntry = GetTempSymbol("C", TOCLabelID++);
-
+
const MCExpr *Exp =
- MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
@@ -406,9 +417,9 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase"));
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
- Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/);
+ 8/*size*/, 0/*addrspace*/);
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext),
- Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/);
+ 8/*size*/, 0/*addrspace*/);
OutStreamer.SwitchSection(Current);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -441,6 +452,23 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+/// EmitFunctionBodyEnd - Print the traceback table before the .size
+/// directive.
+///
+void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
+ // Only the 64-bit target requires a traceback table. For now,
+ // we only emit the word of zeroes that GDB requires to find
+ // the end of the function, and zeroes for the eight-byte
+ // mandatory fields.
+ // FIXME: We should fill in the eight-byte mandatory fields as described in
+ // the PPC64 ELF ABI (this is a low-priority item because GDB does not
+ // currently make use of these fields).
+ if (Subtarget.isPPC64()) {
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ OutStreamer.EmitIntValue(0, 8/*size*/);
+ }
+}
+
void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
static const char *const CPUDirectives[] = {
"",
@@ -453,6 +481,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppc750",
"ppc970",
"ppcA2",
+ "ppce500mc",
+ "ppce5500",
"power6",
"power7",
"ppc64"
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index a00f686..e8f4d16 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -975,6 +975,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::AND: {
unsigned Imm, Imm2, SH, MB, ME;
+ uint64_t Imm64;
// If this is an and of a value rotated between 0 and 31 bits and then and'd
// with a mask, emit rlwinm
@@ -993,6 +994,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
+ isMask_64(Imm64)) {
+ SDValue Val = N->getOperand(0);
+ MB = 64 - CountTrailingOnes_64(Imm64);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) };
+ return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3);
+ }
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
ReplaceUses(SDValue(N, 0), N->getOperand(1));
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 61d44c5..dbb3b14 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -449,6 +449,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
+
+ // The Freescale cores does better with aggressive inlining of memcpy and
+ // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+ if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ maxStoresPerMemset = 32;
+ maxStoresPerMemsetOptSize = 16;
+ maxStoresPerMemcpy = 32;
+ maxStoresPerMemcpyOptSize = 8;
+ maxStoresPerMemmove = 32;
+ maxStoresPerMemmoveOptSize = 8;
+
+ setPrefFunctionAlignment(4);
+ benefitFromCodePlacementOpt = true;
+ }
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
@@ -517,6 +532,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::CR6SET: return "PPCISD::CR6SET";
+ case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
}
}
@@ -811,14 +828,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
}
// Properly sign extend the value.
- int ShAmt = (4-ByteSize)*8;
- int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+ int MaskVal = SignExtend32(Value, ByteSize * 8);
// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
if (MaskVal == 0) return SDValue();
// Finally, if this value fits in a 5 bit sext field, return it
- if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ if (SignExtend32<5>(MaskVal) == MaskVal)
return DAG.getTargetConstant(MaskVal, MVT::i32);
return SDValue();
}
@@ -1204,6 +1220,14 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
const Constant *C = CP->getConstVal();
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue CPIHi =
@@ -1217,6 +1241,14 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@@ -1441,7 +1473,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
MachinePointerInfo(),
MVT::i32, false, false, 0);
- return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
false, false, false, 0);
}
@@ -2408,7 +2440,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
int Addr = C->getZExtValue();
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
- (Addr << 6 >> 6) != Addr)
+ SignExtend32<26>(Addr) != Addr)
return 0; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2,
@@ -2819,6 +2851,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
isTailCall, RegsToPass, Ops, NodeTys,
PPCSubTarget);
+ // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+ if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
@@ -3116,14 +3152,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Set CR6 to true if this is a vararg call with floating args passed in
- // registers.
- if (isVarArg) {
- SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
- dl, MVT::i32), 0);
- RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
- }
-
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
@@ -3133,6 +3161,18 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
+ // Set CR bit 6 to true if this is a vararg call with floating args passed in
+ // registers.
+ if (isVarArg) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, InFlag };
+
+ Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
+ dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+
+ InFlag = Chain.getValue(1);
+ }
+
if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
@@ -4126,7 +4166,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
unsigned TypeShiftAmt = i & (SplatBitSize-1);
// vsplti + shl self.
- if (SextVal == (i << (int)TypeShiftAmt)) {
+ if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
@@ -4171,17 +4211,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
// t = vsplti c, result = vsldoi t, t, 1
- if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
- if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
- if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b0a013b..902b188 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -174,6 +174,10 @@ namespace llvm {
/// operand #3 optional in flag
TC_RETURN,
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 39778a5..cfe71d17 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -29,6 +29,9 @@ def symbolLo64 : Operand<i64> {
let PrintMethod = "printSymbolLo";
let EncoderMethod = "getLO16Encoding";
}
+def tocentry : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@@ -296,12 +299,14 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
"li $rD, $imm", IntSimple,
[(set G8RC:$rD, immSExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
"lis $rD, $imm", IntSimple,
[(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+}
// Logical ops.
def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
@@ -459,7 +464,7 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
let Defs = [CARRY] in {
def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
- "sradi $rA, $rS, $SH", IntRotateD,
+ "sradi $rA, $rS, $SH", IntRotateDI,
[(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
}
def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
@@ -482,7 +487,7 @@ def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
let isCommutable = 1 in {
def RLDIMI : MDForm_1<30, 3,
(outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
- "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+ "rldimi $rA, $rS, $SH, $MB", IntRotateDI,
[]>, isPPC64, RegConstraint<"$rSi = $rA">,
NoEncode<"$rSi">;
}
@@ -494,11 +499,11 @@ def RLDCL : MDForm_1<30, 0,
[]>, isPPC64;
def RLDICL : MDForm_1<30, 0,
(outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
- "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+ "rldicl $rA, $rS, $SH, $MB", IntRotateDI,
[]>, isPPC64;
def RLDICR : MDForm_1<30, 1,
(outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
- "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+ "rldicr $rA, $rS, $SH, $ME", IntRotateDI,
[]>, isPPC64;
def RLWINM8 : MForm_2<21,
@@ -541,19 +546,19 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
let mayLoad = 1 in
def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStLoad,
+ "lhau $rD, $disp($rA)", LdStLHAU,
[]>, RegConstraint<"$rA = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLoad,
+ "lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwaux $rD, $addr", LdStLoad,
+ "lwaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -584,31 +589,31 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
// Update forms.
let mayLoad = 1 in {
def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoad,
+ "lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoad,
+ "lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoad,
+ "lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoad,
+ "lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoad,
+ "lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoad,
+ "lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -624,6 +629,14 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"",
[(set G8RC:$rD,
(PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "",
+ [(set G8RC:$rD,
+ (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
+def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "",
+ [(set G8RC:$rD,
+ (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
let hasSideEffects = 1 in {
let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
@@ -642,13 +655,13 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
let mayLoad = 1 in
def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
- "ldu $rD, $addr", LdStLD,
+ "ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "ldux $rD, $addr", LdStLoad,
+ "ldux $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -695,14 +708,14 @@ let PPC970_Unit = 2 in {
def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStore,
+ "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStore,
+ "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
@@ -710,7 +723,7 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStore,
+ "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
@@ -718,7 +731,7 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
s16immX4:$ptroff, ptr_rc:$ptrreg),
- "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+ "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
[(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
@@ -727,7 +740,7 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -736,7 +749,7 @@ def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -745,7 +758,7 @@ def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti32 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -754,7 +767,7 @@ def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stdux $rS, $ptroff, $ptrreg", LdStStore,
+ "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
[(set ptr_rc:$ea_res,
(pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 47f09dc..d2df664 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -54,7 +54,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
const TargetMachine *TM,
const ScheduleDAG *DAG) const {
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
- if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) {
+ if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
+ Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II = TM->getInstrItineraryData();
return new PPCScoreboardHazardRecognizer(II, DAG);
}
@@ -70,7 +71,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
// Most subtargets use a PPC970 recognizer.
- if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) {
+ if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
+ Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
const TargetInstrInfo *TII = TM.getInstrInfo();
assert(TII && "No InstrInfo?");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f57f0c9..a503908 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -123,9 +123,11 @@ def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
@@ -153,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
+// Instructions to set/unset CR bit 6 for SVR4 vararg calls
+def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
// Instructions to support atomic operations
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
[SDNPHasChain, SDNPMayLoad]>;
@@ -330,9 +338,6 @@ def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
-def tocentry : Operand<iPTR> {
- let MIOperandInfo = (ops i32imm:$imm);
-}
// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
// that doesn't matter.
@@ -673,7 +678,7 @@ def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
[(set GPRC:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
- "lfs $rD, $src", LdStLFDU,
+ "lfs $rD, $src", LdStLFD,
[(set F4RC:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
"lfd $rD, $src", LdStLFD,
@@ -683,32 +688,32 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoad,
+ "lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhau $rD, $addr", LdStLoad,
+ "lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoad,
+ "lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoad,
+ "lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lfs $rD, $addr", LdStLFDU,
+ "lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lfd $rD, $addr", LdStLFD,
+ "lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
@@ -716,37 +721,37 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
// Indexed (r+r) Loads with Update (preinc).
def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoad,
+ "lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLoad,
+ "lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoad,
+ "lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoad,
+ "lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lfsux $rD, $addr", LdStLoad,
+ "lfsux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lfdux $rD, $addr", LdStLoad,
+ "lfdux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -778,10 +783,10 @@ def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
[(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
- "lfsx $frD, $src", LdStLFDU,
+ "lfsx $frD, $src", LdStLFD,
[(set F4RC:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
- "lfdx $frD, $src", LdStLFDU,
+ "lfdx $frD, $src", LdStLFD,
[(set F8RC:$frD, (load xaddr:$src))]>;
}
@@ -801,10 +806,10 @@ def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
"stw $rS, $src", LdStStore,
[(store GPRC:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
- "stfs $rS, $dst", LdStUX,
+ "stfs $rS, $dst", LdStSTFD,
[(store F4RC:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
- "stfd $rS, $dst", LdStUX,
+ "stfd $rS, $dst", LdStSTFD,
[(store F8RC:$rS, iaddr:$dst)]>;
}
@@ -812,33 +817,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
let PPC970_Unit = 2 in {
def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStore,
+ "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStore,
+ "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStore,
+ "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStStore,
+ "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
[(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStStore,
+ "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
[(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
@@ -863,7 +868,7 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 GPRC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -872,7 +877,7 @@ def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 GPRC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -881,7 +886,7 @@ def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -889,7 +894,7 @@ def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
(ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfsux $rS, $ptroff, $ptrreg", LdStStore,
+ "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
[(set ptr_rc:$ea_res,
(pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -897,7 +902,7 @@ def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
(ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfdux $rS, $ptroff, $ptrreg", LdStStore,
+ "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
[(set ptr_rc:$ea_res,
(pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -913,14 +918,14 @@ def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
- "stfiwx $frS, $dst", LdStUX,
+ "stfiwx $frS, $dst", LdStSTFD,
[(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
- "stfsx $frS, $dst", LdStUX,
+ "stfsx $frS, $dst", LdStSTFD,
[(store F4RC:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
- "stfdx $frS, $dst", LdStUX,
+ "stfdx $frS, $dst", LdStSTFD,
[(store F8RC:$frS, xaddr:$dst)]>;
}
@@ -964,7 +969,7 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
[(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
}
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
"li $rD, $imm", IntSimple,
[(set GPRC:$rD, immSExt16:$imm)]>;
@@ -1143,6 +1148,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
"crxor $dst, $dst, $dst", BrCR,
[]>;
+let Defs = [CR1EQ], CRD = 6 in {
+def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
+ "creqv 6, 6, 6", BrCR,
+ [(PPCcr6set)]>;
+
+def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
+ "crxor 6, 6, 6", BrCR,
+ [(PPCcr6unset)]>;
+}
+
// XFX-Form instructions. Instructions that deal with SPRs.
//
let Uses = [CTR] in {
@@ -1233,7 +1248,7 @@ let Uses = [RM] in {
PPC970_DGroup_Single, PPC970_Unit_FPU;
def FADDrtz: AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPGeneral,
+ "fadd $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
@@ -1364,7 +1379,7 @@ def FSELS : AForm_1<63, 23,
let Uses = [RM] in {
def FADD : AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPGeneral,
+ "fadd $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
def FADDS : AForm_2<59, 21,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
@@ -1388,7 +1403,7 @@ let Uses = [RM] in {
[(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
def FSUB : AForm_2<63, 20,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fsub $FRT, $FRA, $FRB", FPGeneral,
+ "fsub $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
def FSUBS : AForm_2<59, 20,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 6a6ccb9..660c0c3 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -40,6 +40,7 @@ def IntMulHWU : InstrItinClass;
def IntMulLI : InstrItinClass;
def IntRFID : InstrItinClass;
def IntRotateD : InstrItinClass;
+def IntRotateDI : InstrItinClass;
def IntRotate : InstrItinClass;
def IntShift : InstrItinClass;
def IntTrapD : InstrItinClass;
@@ -52,15 +53,18 @@ def LdStDCBA : InstrItinClass;
def LdStDCBF : InstrItinClass;
def LdStDCBI : InstrItinClass;
def LdStLoad : InstrItinClass;
+def LdStLoadUpd : InstrItinClass;
def LdStStore : InstrItinClass;
+def LdStStoreUpd : InstrItinClass;
def LdStDSS : InstrItinClass;
def LdStICBI : InstrItinClass;
-def LdStUX : InstrItinClass;
def LdStLD : InstrItinClass;
+def LdStLDU : InstrItinClass;
def LdStLDARX : InstrItinClass;
def LdStLFD : InstrItinClass;
def LdStLFDU : InstrItinClass;
def LdStLHA : InstrItinClass;
+def LdStLHAU : InstrItinClass;
def LdStLMW : InstrItinClass;
def LdStLVecX : InstrItinClass;
def LdStLWA : InstrItinClass;
@@ -69,6 +73,9 @@ def LdStSLBIA : InstrItinClass;
def LdStSLBIE : InstrItinClass;
def LdStSTD : InstrItinClass;
def LdStSTDCX : InstrItinClass;
+def LdStSTDU : InstrItinClass;
+def LdStSTFD : InstrItinClass;
+def LdStSTFDU : InstrItinClass;
def LdStSTVEBX : InstrItinClass;
def LdStSTWCX : InstrItinClass;
def LdStSync : InstrItinClass;
@@ -86,6 +93,7 @@ def SprMTSRIN : InstrItinClass;
def SprRFI : InstrItinClass;
def SprSC : InstrItinClass;
def FPGeneral : InstrItinClass;
+def FPAddSub : InstrItinClass;
def FPCompare : InstrItinClass;
def FPDivD : InstrItinClass;
def FPDivS : InstrItinClass;
@@ -110,6 +118,8 @@ include "PPCScheduleG4.td"
include "PPCScheduleG4Plus.td"
include "PPCScheduleG5.td"
include "PPCScheduleA2.td"
+include "PPCScheduleE500mc.td"
+include "PPCScheduleE5500.td"
//===----------------------------------------------------------------------===//
// Instruction to itinerary class map - When add new opcodes to the supported
@@ -171,7 +181,7 @@ include "PPCScheduleA2.td"
// extsh IntSimple
// extsw IntSimple
// fabs FPGeneral
-// fadd FPGeneral
+// fadd FPAddSub
// fadds FPGeneral
// fcfid FPGeneral
// fcmpo FPCompare
@@ -201,35 +211,35 @@ include "PPCScheduleA2.td"
// fsel FPGeneral
// fsqrt FPSqrt
// fsqrts FPSqrt
-// fsub FPGeneral
+// fsub FPAddSub
// fsubs FPGeneral
// icbi LdStICBI
// isync SprISYNC
// lbz LdStLoad
-// lbzu LdStLoad
-// lbzux LdStUX
+// lbzu LdStLoadUpd
+// lbzux LdStLoadUpd
// lbzx LdStLoad
// ld LdStLD
// ldarx LdStLDARX
-// ldu LdStLD
-// ldux LdStLD
+// ldu LdStLDU
+// ldux LdStLDU
// ldx LdStLD
// lfd LdStLFD
// lfdu LdStLFDU
// lfdux LdStLFDU
-// lfdx LdStLFDU
-// lfs LdStLFDU
+// lfdx LdStLFD
+// lfs LdStLFD
// lfsu LdStLFDU
// lfsux LdStLFDU
-// lfsx LdStLFDU
+// lfsx LdStLFD
// lha LdStLHA
-// lhau LdStLHA
-// lhaux LdStLHA
+// lhau LdStLHAU
+// lhaux LdStLHAU
// lhax LdStLHA
// lhbrx LdStLoad
// lhz LdStLoad
-// lhzu LdStLoad
-// lhzux LdStUX
+// lhzu LdStLoadUpd
+// lhzux LdStLoadUpd
// lhzx LdStLoad
// lmw LdStLMW
// lswi LdStLMW
@@ -243,12 +253,12 @@ include "PPCScheduleA2.td"
// lvxl LdStLVecX
// lwa LdStLWA
// lwarx LdStLWARX
-// lwaux LdStLHA
+// lwaux LdStLHAU
// lwax LdStLHA
// lwbrx LdStLoad
// lwz LdStLoad
-// lwzu LdStLoad
-// lwzux LdStUX
+// lwzu LdStLoadUpd
+// lwzux LdStLoadUpd
// lwzx LdStLoad
// mcrf BrMCR
// mcrfs FPGeneral
@@ -292,10 +302,10 @@ include "PPCScheduleA2.td"
// rfid IntRFID
// rldcl IntRotateD
// rldcr IntRotateD
-// rldic IntRotateD
-// rldicl IntRotateD
-// rldicr IntRotateD
-// rldimi IntRotateD
+// rldic IntRotateDI
+// rldicl IntRotateDI
+// rldicr IntRotateDI
+// rldimi IntRotateDI
// rlwimi IntRotate
// rlwinm IntGeneral
// rlwnm IntGeneral
@@ -305,33 +315,33 @@ include "PPCScheduleA2.td"
// sld IntRotateD
// slw IntGeneral
// srad IntRotateD
-// sradi IntRotateD
+// sradi IntRotateDI
// sraw IntShift
// srawi IntShift
// srd IntRotateD
// srw IntGeneral
// stb LdStStore
-// stbu LdStStore
-// stbux LdStStore
+// stbu LdStStoreUpd
+// stbux LdStStoreUpd
// stbx LdStStore
// std LdStSTD
// stdcx. LdStSTDCX
-// stdu LdStSTD
-// stdux LdStSTD
+// stdu LdStSTDU
+// stdux LdStSTDU
// stdx LdStSTD
-// stfd LdStUX
-// stfdu LdStUX
-// stfdux LdStUX
-// stfdx LdStUX
-// stfiwx LdStUX
-// stfs LdStUX
-// stfsu LdStUX
-// stfsux LdStUX
-// stfsx LdStUX
+// stfd LdStSTFD
+// stfdu LdStSTFDU
+// stfdux LdStSTFDU
+// stfdx LdStSTFD
+// stfiwx LdStSTFD
+// stfs LdStSTFD
+// stfsu LdStSTFDU
+// stfsux LdStSTFDU
+// stfsx LdStSTFD
// sth LdStStore
// sthbrx LdStStore
-// sthu LdStStore
-// sthux LdStStore
+// sthu LdStStoreUpd
+// sthux LdStStoreUpd
// sthx LdStStore
// stmw LdStLMW
// stswi LdStLMW
@@ -344,8 +354,8 @@ include "PPCScheduleA2.td"
// stw LdStStore
// stwbrx LdStStore
// stwcx. LdStSTWCX
-// stwu LdStStore
-// stwux LdStStore
+// stwu LdStStoreUpd
+// stwux LdStStoreUpd
// stwx LdStStore
// subf IntGeneral
// subfc IntGeneral
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
index cd0fb70..37b6eac 100644
--- a/lib/Target/PowerPC/PPCSchedule440.td
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -288,6 +288,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [LWB]>],
[9, 5],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -297,6 +306,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -306,7 +324,7 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
@@ -315,6 +333,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [LWB]>],
[8, 5, 5],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -342,6 +369,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -371,6 +407,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1]>,
@@ -537,6 +582,19 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [FWB]>],
[10, 4, 4],
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index 4d4a5d0..ba63b5c 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -181,6 +181,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[10, 7, 7],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateDI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntShift , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -302,7 +313,18 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLD , [InstrStage<4,
+ InstrItinData<LdStLoadUpd , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDU , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
IU4_4, IU4_5, IU4_6, IU4_7]>,
@@ -324,6 +346,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[13, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStICBI , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -335,7 +368,7 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStUX , [InstrStage<4,
+ InstrItinData<LdStSTFD , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
IU4_4, IU4_5, IU4_6, IU4_7]>,
@@ -346,6 +379,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7, 7],
[NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<LdStLFD , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -379,6 +423,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStLMW , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -412,6 +467,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[13, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStSTDCX , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -593,6 +659,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
[15, 7, 7],
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7, 7],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPCompare , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
new file mode 100644
index 0000000..9bb779a
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -0,0 +1,265 @@
+//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e500mc 32-bit
+// Power processor.
+//
+// All information is derived from the "e500mc Core Reference Manual",
+// Freescale Document Number E500MCRM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e500mc core:
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+def DIS0 : FuncUnit; // Dispatch stage - insn 1
+def DIS1 : FuncUnit; // Dispatch stage - insn 2
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// Some instructions can only execute in SFX0 but not SFX1.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is executed.
+def SFX0 : FuncUnit; // Simple unit 0
+def SFX1 : FuncUnit; // Simple unit 1
+def BU : FuncUnit; // Branch unit
+def CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def CFX_0 : FuncUnit; // CFX pipeline
+def LSU_0 : FuncUnit; // LSU pipeline
+def FPU_0 : FuncUnit; // FPU pipeline
+
+def PPCE500mcItineraries : ProcessorItineraries<
+ [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
+ [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 1, 1], // Latency = 1 or 2
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<14, [CFX_DivBypass]>],
+ [17, 1, 1], // Latency=4..35, Repeat= 4..35
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<8, [FPU_0]>],
+ [11], // Latency = 8
+ [FPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<8, [FPU_0]>],
+ [11, 1, 1], // Latency = 8
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0]>],
+ [5, 1], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1], // Latency = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1, 1], // Latency = 1
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1], // Latency = 1
+ [CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1], // Latency = r+3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [6, 1, 1], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>]>,
+ InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [5, 1], // Latency = 2, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0]>],
+ [5, 1],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0], 0>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<5, [SFX0]>],
+ [8, 1],
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0]>],
+ [4, 1],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [FPU_0]>],
+ [13, 1, 1], // Latency = 10, Repeat rate = 4
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<68, [FPU_0]>],
+ [71, 1, 1], // Latency = 68, Repeat rate = 68
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<38, [FPU_0]>],
+ [41, 1, 1], // Latency = 38, Repeat rate = 38
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [FPU_0]>],
+ [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<38, [FPU_0]>],
+ [41, 1], // Latency = 38, Repeat rate = 38
+ [FPR_Bypass, FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e500mc machine model for scheduling and other instruction cost heuristics.
+
+def PPCE500mcModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE500mcItineraries;
+}
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
new file mode 100644
index 0000000..d7e11ac
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -0,0 +1,309 @@
+//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e5500 64-bit
+// Power processor.
+//
+// All information is derived from the "e5500 Core Reference Manual",
+// Freescale Document Number e5500RM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e5500 core
+// (These are the same as for the e500mc)
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+// def DIS0 : FuncUnit;
+// def DIS1 : FuncUnit;
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is being executed.
+// def SFX0 : FuncUnit; // Simple unit 0
+// def SFX1 : FuncUnit; // Simple unit 1
+// def BU : FuncUnit; // Branch unit
+// def CFX_DivBypass
+// : FuncUnit; // CFX divide bypass path
+// def CFX_0 : FuncUnit; // CFX pipeline stage 0
+
+def CFX_1 : FuncUnit; // CFX pipeline stage 1
+
+// def LSU_0 : FuncUnit; // LSU pipeline
+// def FPU_0 : FuncUnit; // FPU pipeline
+
+
+def PPCE5500Itineraries : ProcessorItineraries<
+ [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
+ LSU_0, FPU_0],
+ [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 1 or 2
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<26, [CFX_DivBypass]>],
+ [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<16, [CFX_DivBypass]>],
+ [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<7, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 7
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<2, [CFX_1]>],
+ [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<1, [CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<1, [CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<2, [CFX_1]>],
+ [8, 2, 2], // Latency = 4 or 5, Repeat = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0]>],
+ [6, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2], // Latency = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2, 2], // Latency = 1
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2], // Latency = 1
+ [CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [5, 2, 2], // Latency = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [LSU_0]>],
+ [8, 2], // Latency = r+3, Repeat rate = r+3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [CFX_0]>],
+ [6, 2], // Latency = 2, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0], 0>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<5, [CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [CFX_0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<31, [FPU_0]>],
+ [39, 2, 2], // Latency = 35, Repeat rate = 31
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<16, [FPU_0]>],
+ [24, 2, 2], // Latency = 20, Repeat rate = 16
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [12, 2], // Latency = 8, Repeat rate = 2
+ [FPR_Bypass, FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def PPCE5500Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE5500Itineraries;
+}
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index 61e89ed..72a0a39 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -34,12 +34,16 @@ def G3Itineraries : ProcessorItineraries<
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
@@ -58,6 +62,7 @@ def G3Itineraries : ProcessorItineraries<
InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index e19ddfa..fc9120d 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -33,13 +33,17 @@ def G4Itineraries : ProcessorItineraries<
InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
@@ -60,6 +64,7 @@ def G4Itineraries : ProcessorItineraries<
InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index e7446cb..a4e82ce 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -36,19 +36,24 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
- InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
@@ -66,6 +71,7 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 1371499..7c02ea0 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -27,6 +27,7 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
@@ -37,15 +38,20 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
@@ -53,6 +59,7 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
@@ -69,6 +76,7 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>,
InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 0207c83..b8b1614 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -41,6 +41,8 @@ namespace PPC {
DIR_750,
DIR_970,
DIR_A2,
+ DIR_E500mc,
+ DIR_E5500,
DIR_PWR6,
DIR_PWR7,
DIR_64
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 15541ef..e64c140 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -129,7 +129,7 @@ def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
[SDNPHasChain, SDNPOptInGlue]>;
def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
def getPCX : Operand<i32> {
let PrintMethod = "printGetPCX";
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 8e215a7..62f973e 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -24,6 +24,16 @@ void TargetLibraryInfo::anchor() { }
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
+ "_ZdaPv",
+ "_ZdlPv",
+ "_Znaj",
+ "_ZnajRKSt9nothrow_t",
+ "_Znam",
+ "_ZnamRKSt9nothrow_t",
+ "_Znwj",
+ "_ZnwjRKSt9nothrow_t",
+ "_Znwm",
+ "_ZnwmRKSt9nothrow_t",
"__cxa_atexit",
"__cxa_guard_abort",
"__cxa_guard_acquire",
@@ -31,16 +41,29 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__memcpy_chk",
"acos",
"acosf",
+ "acosh",
+ "acoshf",
+ "acoshl",
"acosl",
"asin",
"asinf",
+ "asinh",
+ "asinhf",
+ "asinhl",
"asinl",
"atan",
"atan2",
"atan2f",
"atan2l",
"atanf",
+ "atanh",
+ "atanhf",
+ "atanhl",
"atanl",
+ "calloc",
+ "cbrt",
+ "cbrtf",
+ "cbrtl",
"ceil",
"ceilf",
"ceill",
@@ -54,6 +77,9 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"coshl",
"cosl",
"exp",
+ "exp10",
+ "exp10f",
+ "exp10l",
"exp2",
"exp2f",
"exp2l",
@@ -74,6 +100,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"fmodl",
"fputc",
"fputs",
+ "free",
"fwrite",
"iprintf",
"log",
@@ -86,8 +113,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"log2",
"log2f",
"log2l",
+ "logb",
+ "logbf",
+ "logbl",
"logf",
"logl",
+ "malloc",
"memchr",
"memcmp",
"memcpy",
@@ -97,11 +128,14 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"nearbyint",
"nearbyintf",
"nearbyintl",
+ "posix_memalign",
"pow",
"powf",
"powl",
"putchar",
"puts",
+ "realloc",
+ "reallocf",
"rint",
"rintf",
"rintl",
@@ -121,10 +155,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"strcat",
"strchr",
"strcpy",
+ "strdup",
"strlen",
"strncat",
"strncmp",
"strncpy",
+ "strndup",
"strnlen",
"tan",
"tanf",
@@ -134,7 +170,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"tanl",
"trunc",
"truncf",
- "truncl"
+ "truncl",
+ "valloc"
};
/// initialize - Initialize the set of available library functions based on the
@@ -205,6 +242,21 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::tanhl);
// Win32 only has C89 math
+ TLI.setUnavailable(LibFunc::acosh);
+ TLI.setUnavailable(LibFunc::acoshf);
+ TLI.setUnavailable(LibFunc::acoshl);
+ TLI.setUnavailable(LibFunc::asinh);
+ TLI.setUnavailable(LibFunc::asinhf);
+ TLI.setUnavailable(LibFunc::asinhl);
+ TLI.setUnavailable(LibFunc::atanh);
+ TLI.setUnavailable(LibFunc::atanhf);
+ TLI.setUnavailable(LibFunc::atanhl);
+ TLI.setUnavailable(LibFunc::cbrt);
+ TLI.setUnavailable(LibFunc::cbrtf);
+ TLI.setUnavailable(LibFunc::cbrtl);
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ TLI.setUnavailable(LibFunc::exp10l);
TLI.setUnavailable(LibFunc::exp2);
TLI.setUnavailable(LibFunc::exp2f);
TLI.setUnavailable(LibFunc::exp2l);
@@ -217,6 +269,9 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::log1p);
TLI.setUnavailable(LibFunc::log1pf);
TLI.setUnavailable(LibFunc::log1pl);
+ TLI.setUnavailable(LibFunc::logb);
+ TLI.setUnavailable(LibFunc::logbf);
+ TLI.setUnavailable(LibFunc::logbl);
TLI.setUnavailable(LibFunc::nearbyint);
TLI.setUnavailable(LibFunc::nearbyintf);
TLI.setUnavailable(LibFunc::nearbyintl);
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 73a0095..c89e738 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -67,12 +67,19 @@ private:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
- bool MatchInstruction(SMLoc IDLoc,
+ bool MatchInstruction(SMLoc IDLoc, unsigned &Kind,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
SmallVectorImpl<MCInst> &MCInsts,
unsigned &OrigErrorInfo,
bool matchingInlineAsm = false);
+ unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned OperandNum, unsigned &NumMCOperands) {
+ return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum,
+ NumMCOperands);
+ }
+
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
/// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
@@ -514,12 +521,13 @@ bool X86AsmParser::isDstOp(X86Operand &Op) {
bool X86AsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
RegNo = 0;
- if (!isParsingIntelSyntax()) {
- const AsmToken &TokPercent = Parser.getTok();
- assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
- StartLoc = TokPercent.getLoc();
+ const AsmToken &PercentTok = Parser.getTok();
+ StartLoc = PercentTok.getLoc();
+
+ // If we encounter a %, ignore it. This code handles registers with and
+ // without the prefix, unprefixed registers can occur in cfi directives.
+ if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
Parser.Lex(); // Eat percent token.
- }
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -1516,9 +1524,12 @@ bool X86AsmParser::
MatchAndEmitInstruction(SMLoc IDLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out) {
- SmallVector<MCInst, 2> Insts;
+ unsigned Kind;
unsigned ErrorInfo;
- bool Error = MatchInstruction(IDLoc, Operands, Insts, ErrorInfo);
+ SmallVector<MCInst, 2> Insts;
+
+ bool Error = MatchInstruction(IDLoc, Kind, Operands, Insts,
+ ErrorInfo);
if (!Error)
for (unsigned i = 0, e = Insts.size(); i != e; ++i)
Out.EmitInstruction(Insts[i]);
@@ -1526,7 +1537,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
bool X86AsmParser::
-MatchInstruction(SMLoc IDLoc,
+MatchInstruction(SMLoc IDLoc, unsigned &Kind,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo,
bool matchingInlineAsm) {
@@ -1537,7 +1548,7 @@ MatchInstruction(SMLoc IDLoc,
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
- // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+ // Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
@@ -1568,7 +1579,7 @@ MatchInstruction(SMLoc IDLoc,
MCInst Inst;
// First, try a direct match.
- switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo,
+ switch (MatchInstructionImpl(Operands, Kind, Inst, OrigErrorInfo,
isParsingIntelSyntax())) {
default: break;
case Match_Success:
@@ -1585,9 +1596,6 @@ MatchInstruction(SMLoc IDLoc,
Error(IDLoc, "instruction requires a CPU feature not currently enabled",
EmptyRanges, matchingInlineAsm);
return true;
- case Match_ConversionFail:
- return Error(IDLoc, "unable to convert operands to instruction",
- EmptyRanges, matchingInlineAsm);
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@@ -1619,14 +1627,19 @@ MatchInstruction(SMLoc IDLoc,
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
unsigned Match1, Match2, Match3, Match4;
+ unsigned tKind;
- Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ if (Match1 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[1];
- Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ if (Match2 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[2];
- Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ if (Match3 == Match_Success) Kind = tKind;
Tmp[Base.size()] = Suffixes[3];
- Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore);
+ if (Match4 == Match_Success) Kind = tKind;
// Restore the old token.
Op->setTokenValue(Base);
@@ -1677,8 +1690,10 @@ MatchInstruction(SMLoc IDLoc,
if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
+ ArrayRef<SMRange> Ranges = matchingInlineAsm ? EmptyRanges :
+ Op->getLocRange();
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
- Op->getLocRange(), matchingInlineAsm);
+ Ranges, matchingInlineAsm);
}
// Recover location info for the operand if we know which was the problem.
@@ -1730,7 +1745,10 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return ParseDirectiveWord(2, DirectiveID.getLoc());
else if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
- else if (IDVal.startswith(".intel_syntax")) {
+ else if (IDVal.startswith(".att_syntax")) {
+ getParser().setAssemblerDialect(0);
+ return false;
+ } else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if(Parser.getTok().getString() == "noprefix") {
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 5039887..f136927 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -44,7 +44,7 @@ void x86DisassemblerDebug(const char *file,
dbgs() << file << ":" << line << ": " << s;
}
-const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) {
+const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) {
const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
return MII->getName(Opcode);
}
@@ -95,8 +95,8 @@ const EDInstInfo *X86GenericDisassembler::getEDInfo() const {
/// be a pointer to a MemoryObject.
/// @param byte - A pointer to the byte to be read.
/// @param address - The address to be read.
-static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
- MemoryObject* region = static_cast<MemoryObject*>(arg);
+static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
+ const MemoryObject* region = static_cast<const MemoryObject*>(arg);
return region->readByte(address, byte);
}
@@ -135,10 +135,10 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
int ret = decodeInstruction(&internalInstr,
regionReader,
- (void*)&region,
+ (const void*)&region,
loggerFn,
(void*)&vStream,
- (void*)MII,
+ (const void*)MII,
address,
fMode);
@@ -379,6 +379,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
switch (type) {
+ case TYPE_XMM32:
+ case TYPE_XMM64:
case TYPE_XMM128:
mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
return;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 0c92912..af444d1 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -200,7 +200,7 @@ static void unconsumeByte(struct InternalInstruction* insn) {
insn->readerCursor + offset); \
if (ret) \
return ret; \
- combined = combined | ((type)byte << ((type)offset * 8)); \
+ combined = combined | ((uint64_t)byte << (offset * 8)); \
} \
*ptr = combined; \
insn->readerCursor += sizeof(type); \
@@ -719,7 +719,7 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
* @return - 0 if the ModR/M could be read when needed or was not needed;
* nonzero otherwise.
*/
-static int getID(struct InternalInstruction* insn, void *miiArg) {
+static int getID(struct InternalInstruction* insn, const void *miiArg) {
uint8_t attrMask;
uint16_t instructionID;
@@ -1621,10 +1621,10 @@ static int readOperands(struct InternalInstruction* insn) {
*/
int decodeInstruction(struct InternalInstruction* insn,
byteReader_t reader,
- void* readerArg,
+ const void* readerArg,
dlog_t logger,
void* loggerArg,
- void* miiArg,
+ const void* miiArg,
uint64_t startLoc,
DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 797703f..05cbb4c 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -403,7 +403,7 @@ typedef uint8_t BOOL;
* be read from.
* @return - -1 if the byte cannot be read for any reason; 0 otherwise.
*/
-typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
+typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
/*
* dlog_t - Type for the logging function that the consumer can provide to
@@ -422,7 +422,7 @@ struct InternalInstruction {
/* Reader interface (C) */
byteReader_t reader;
/* Opaque value passed to the reader */
- void* readerArg;
+ const void* readerArg;
/* The address of the next byte to read via the reader */
uint64_t readerCursor;
@@ -561,10 +561,10 @@ struct InternalInstruction {
*/
int decodeInstruction(struct InternalInstruction* insn,
byteReader_t reader,
- void* readerArg,
+ const void* readerArg,
dlog_t logger,
void* loggerArg,
- void* miiArg,
+ const void* miiArg,
uint64_t startLoc,
DisassemblerMode mode);
@@ -579,7 +579,7 @@ void x86DisassemblerDebug(const char *file,
unsigned line,
const char *s);
-const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii);
+const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
#ifdef __cplusplus
}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 624e56f..4011035 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -941,3 +941,15 @@ and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the
cost of reduced accuracy.
//===---------------------------------------------------------------------===//
+
+This function should be matched to haddpd when the appropriate CPU is enabled:
+
+#include <x86intrin.h>
+double f (__m128d p) {
+ return p[0] + p[1];
+}
+
+similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should
+turn into hsubpd also.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 18e6b7c..d078a7b 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -120,6 +120,9 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
+def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
+ "HasSlowDivide", "true",
+ "Use small divide for positive values less than 256">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -160,7 +163,8 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B,
- FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>;
+ FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
+ FeatureSlowDivide]>;
// "Arrandale" along with corei3 and corei5
def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureFastUAMem,
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index db71e27..a4785c9 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -233,12 +233,14 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier) {
+ raw_ostream &O, const char *Modifier,
+ unsigned AsmVariant) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
default: llvm_unreachable("unknown operand type!");
case MachineOperand::MO_Register: {
- O << '%';
+ // FIXME: Enumerating AsmVariant, so we can remove magic number.
+ if (AsmVariant == 0) O << '%';
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
EVT VT = (strcmp(Modifier+6,"64") == 0) ?
@@ -471,7 +473,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
}
- printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, O, /*Modifier*/ 0, AsmVariant);
return false;
}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 35386cd..0062387 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -50,7 +50,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
// These methods are used by the tablegen'erated instruction printer.
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = 0, unsigned AsmVariant = 0);
void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index d705049..e202321 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
@@ -134,8 +133,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
do {
- DEBUG(dbgs() << "JITTing function '"
- << MF.getFunction()->getName() << "'\n");
+ DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n");
MCE.startFunction(MF);
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index e5952aa..54704d8 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2014,13 +2014,17 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
MVT VT;
if (!isTypeLegal(C->getType(), VT))
- return false;
+ return 0;
+
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return 0;
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
+ default: return 0;
case MVT::i8:
Opc = X86::MOV8rm;
RC = &X86::GR8RegClass;
@@ -2058,7 +2062,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
break;
case MVT::f80:
// No f80 support yet.
- return false;
+ return 0;
}
// Materialize addresses with LEA instructions.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 955c75a..9d5de81 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -171,6 +171,7 @@ namespace {
// Shuffle live registers to match the expectations of successor blocks.
void finishBlockStack();
+#ifndef NDEBUG
void dumpStack() const {
dbgs() << "Stack contents:";
for (unsigned i = 0; i != StackTop; ++i) {
@@ -181,6 +182,7 @@ namespace {
dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
dbgs() << "\n";
}
+#endif
/// getSlot - Return the stack slot number a particular register number is
/// in.
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 27195b4..5fdc61e 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -100,6 +100,7 @@ namespace {
Base_Reg = Reg;
}
+#ifndef NDEBUG
void dump() {
dbgs() << "X86ISelAddressMode " << this << '\n';
dbgs() << "Base_Reg ";
@@ -133,6 +134,7 @@ namespace {
dbgs() << "nul";
dbgs() << " JT" << JT << " Align" << Align << '\n';
}
+#endif
};
}
@@ -1011,7 +1013,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
- uint64_t Disp = AddVal->getSExtValue() << Val;
+ uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
if (!FoldOffsetIntoAddress(Disp, AM))
return false;
}
@@ -2116,7 +2118,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Make sure that we don't change the operation by removing bits.
// This only matters for OR and XOR, AND is unaffected.
- if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+ uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
+ if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
break;
unsigned ShlOp, Op;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7954170..5c525ae 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -85,7 +85,7 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
* ElemsPerChunk);
- SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
VecIdx);
@@ -118,7 +118,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
* ElemsPerChunk);
- SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
VecIdx);
}
@@ -182,6 +182,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
+ // Bypass i32 with i8 on Atom when compiling with O2
+ if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
+ addBypassSlowDivType(Type::getInt32Ty(getGlobalContext()), Type::getInt8Ty(getGlobalContext()));
+
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
@@ -735,6 +739,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FFLOOR, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
@@ -824,6 +829,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f32, Custom);
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
@@ -857,6 +863,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
@@ -925,6 +932,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
}
if (Subtarget->hasSSE41()) {
@@ -939,6 +948,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1016,19 +1028,25 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v8f32, Custom);
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v4f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
+
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
@@ -1052,7 +1070,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
- if (Subtarget->hasFMA()) {
+ if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Custom);
setOperationAction(ISD::FMA, MVT::v4f64, Custom);
setOperationAction(ISD::FMA, MVT::v4f32, Custom);
@@ -2832,7 +2850,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII =
- ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+ ((const X86TargetMachine&)getTargetMachine()).getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
@@ -3506,25 +3524,26 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
MatchOddMask = false;
}
- static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1};
- static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1};
- const int *CompactionMask;
- if (MatchEvenMask)
- CompactionMask = CompactionMaskEven;
- else if (MatchOddMask)
- CompactionMask = CompactionMaskOdd;
- else
+ if (!MatchEvenMask && !MatchOddMask)
return SDValue();
-
+
SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
- SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0),
- UndefNode, CompactionMask);
- SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1),
- UndefNode, CompactionMask);
- static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13};
- return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask);
+ SDValue Op0 = SVOp->getOperand(0);
+ SDValue Op1 = SVOp->getOperand(1);
+
+ if (MatchEvenMask) {
+ // Shift the second operand right to 32 bits.
+ static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
+ Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
+ } else {
+ // Shift the first operand left to 32 bits.
+ static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
+ Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
+ }
+ static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
+ return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
}
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
@@ -4977,6 +4996,18 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
LDBase->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
+
+ // Make sure the newly-created LOAD is in the same position as LDBase in
+ // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
+ // update uses of LDBase's output chain to use the TokenFactor.
+ if (LDBase->hasAnyUseOfValue(1)) {
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
+ DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
+ SDValue(ResNode.getNode(), 1));
+ }
+
return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
@@ -5881,8 +5912,6 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
- bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
-
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
@@ -5906,7 +5935,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
- if (V2IsUndef)
+
+ // As PSHUFB will zero elements with negative indices, it's safe to ignore
+ // the 2nd operand if it's undefined or zero.
+ if (V2.getOpcode() == ISD::UNDEF ||
+ ISD::isBuildVectorAllZeros(V2.getNode()))
return V1;
// Calculate the shuffle mask for the second input, shuffle it, and
@@ -5992,6 +6025,51 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
}
+// v32i8 shuffles - Translate to VPSHUFB if possible.
+static
+SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG,
+ const X86TargetLowering &TLI) {
+ EVT VT = SVOp->getValueType(0);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
+
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ // VPSHUFB may be generated if
+ // (1) one of input vector is undefined or zeroinitializer.
+ // The mask value 0x80 puts 0 in the corresponding slot of the vector.
+ // And (2) the mask indexes don't cross the 128-bit lane.
+ if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() ||
+ (!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
+ return SDValue();
+
+ if (V1IsAllZero && !V2IsAllZero) {
+ CommuteVectorShuffleMask(MaskVals, 32);
+ V1 = V2;
+ }
+ SmallVector<SDValue, 32> pshufbMask;
+ for (unsigned i = 0; i != 32; i++) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || EltIdx >= 32)
+ EltIdx = 0x80;
+ else {
+ if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16))
+ // Cross lane is not allowed.
+ return SDValue();
+ EltIdx &= 0xf;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v32i8, &pshufbMask[0], 32));
+}
+
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
@@ -6818,6 +6896,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return NewOp;
}
+ if (VT == MVT::v32i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
// Handle all 128-bit wide vectors with 4 elements, and match them with
// several different shuffle types.
if (NumElems == 4 && VT.is128BitVector())
@@ -8115,26 +8199,35 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
return FIST;
}
-SDValue X86TargetLowering::LowerFABS(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT EltVT = VT;
- if (VT.isVector())
+ unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ if (VT.isVector()) {
EltVT = VT.getVectorElementType();
- Constant *C;
- if (EltVT == MVT::f64) {
- C = ConstantVector::getSplat(2,
- ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
- } else {
- C = ConstantVector::getSplat(4,
- ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+ NumElts = VT.getVectorNumElements();
}
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ Constant *C;
+ if (EltVT == MVT::f64)
+ C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+ else
+ C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+ C = ConstantVector::getSplat(NumElts, C);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ false, false, false, Alignment);
+ if (VT.isVector()) {
+ MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::AND, dl, ANDVT,
+ DAG.getNode(ISD::BITCAST, dl, ANDVT,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask)));
+ }
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
@@ -8154,10 +8247,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
else
C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
C = ConstantVector::getSplat(NumElts, C);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ false, false, false, Alignment);
if (VT.isVector()) {
MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
@@ -9943,62 +10037,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
Op.getOperand(1), Op.getOperand(2), DAG);
}
- // Fix vector shift instructions where the last operand is a non-immediate
- // i32 value.
- case Intrinsic::x86_mmx_pslli_w:
- case Intrinsic::x86_mmx_pslli_d:
- case Intrinsic::x86_mmx_pslli_q:
- case Intrinsic::x86_mmx_psrli_w:
- case Intrinsic::x86_mmx_psrli_d:
- case Intrinsic::x86_mmx_psrli_q:
- case Intrinsic::x86_mmx_psrai_w:
- case Intrinsic::x86_mmx_psrai_d: {
- SDValue ShAmt = Op.getOperand(2);
- if (isa<ConstantSDNode>(ShAmt))
- return SDValue();
-
- unsigned NewIntNo;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_mmx_pslli_w:
- NewIntNo = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntNo = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntNo = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntNo = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntNo = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntNo = Intrinsic::x86_mmx_psrl_q;
- break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntNo = Intrinsic::x86_mmx_psra_w;
- break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntNo = Intrinsic::x86_mmx_psra_d;
- break;
- }
-
- // The vector shift intrinsics with scalars uses 32b shift amounts but
- // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
- // to be zero.
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt,
- DAG.getConstant(0, MVT::i32));
-// FIXME this must be lowered to get rid of the invalid type.
-
- EVT VT = Op.getValueType();
- ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(NewIntNo, MVT::i32),
- Op.getOperand(1), ShAmt);
- }
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
@@ -10077,6 +10115,74 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
}
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ Opc = X86ISD::FMADD;
+ break;
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ Opc = X86ISD::FMSUB;
+ break;
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ Opc = X86ISD::FNMADD;
+ break;
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ Opc = X86ISD::FNMSUB;
+ break;
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ Opc = X86ISD::FMADDSUB;
+ break;
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ Opc = X86ISD::FMSUBADD;
+ break;
+ }
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ }
}
}
@@ -10918,7 +11024,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);
}
// fall through
case MVT::v4i32:
@@ -14020,7 +14126,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
//
// where Op could be BRCOND or CMOV.
//
-static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
+static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
// Quit if not CMP and SUB with its value result used.
if (Cmp.getOpcode() != X86ISD::CMP &&
(Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
@@ -14056,40 +14162,133 @@ static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
SetCC = SetCC.getOperand(0);
- // Quit if not SETCC.
- // FIXME: So far we only handle the boolean value generated from SETCC. If
- // there is other ways to generate boolean values, we need handle them here
- // as well.
- if (SetCC.getOpcode() != X86ISD::SETCC)
+ switch (SetCC.getOpcode()) {
+ case X86ISD::SETCC:
+ // Set the condition code or opposite one if necessary.
+ CC = X86::CondCode(SetCC.getConstantOperandVal(0));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+ return SetCC.getOperand(1);
+ case X86ISD::CMOV: {
+ // Check whether false/true value has canonical one, i.e. 0 or 1.
+ ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
+ ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
+ // Quit if true value is not a constant.
+ if (!TVal)
+ return SDValue();
+ // Quit if false value is not a constant.
+ if (!FVal) {
+ // A special case for rdrand, where 0 is set if false cond is found.
+ SDValue Op = SetCC.getOperand(0);
+ if (Op.getOpcode() != X86ISD::RDRAND)
+ return SDValue();
+ }
+ // Quit if false value is not the constant 0 or 1.
+ bool FValIsFalse = true;
+ if (FVal && FVal->getZExtValue() != 0) {
+ if (FVal->getZExtValue() != 1)
+ return SDValue();
+ // If FVal is 1, opposite cond is needed.
+ needOppositeCond = !needOppositeCond;
+ FValIsFalse = false;
+ }
+ // Quit if TVal is not the constant opposite of FVal.
+ if (FValIsFalse && TVal->getZExtValue() != 1)
+ return SDValue();
+ if (!FValIsFalse && TVal->getZExtValue() != 0)
+ return SDValue();
+ CC = X86::CondCode(SetCC.getConstantOperandVal(2));
+ if (needOppositeCond)
+ CC = X86::GetOppositeBranchCondition(CC);
+ return SetCC.getOperand(3);
+ }
+ }
+
+ return SDValue();
+}
+
+/// checkFlaggedOrCombine - DAG combination on X86ISD::OR, i.e. with EFLAGS
+/// updated. If only flag result is used and the result is evaluated from a
+/// series of element extraction, try to combine it into a PTEST.
+static SDValue checkFlaggedOrCombine(SDValue Or, X86::CondCode &CC,
+ SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDNode *N = Or.getNode();
+ DebugLoc DL = N->getDebugLoc();
+
+ // Only SSE4.1 and beyond supports PTEST or like.
+ if (!Subtarget->hasSSE41())
return SDValue();
- // Set the condition code or opposite one if necessary.
- CC = X86::CondCode(SetCC.getConstantOperandVal(0));
- if (needOppositeCond)
- CC = X86::GetOppositeBranchCondition(CC);
+ if (N->getOpcode() != X86ISD::OR)
+ return SDValue();
- return SetCC.getOperand(1);
-}
+ // Quit if the value result of OR is used.
+ if (N->hasAnyUseOfValue(0))
+ return SDValue();
-static bool IsValidFCMOVCondition(X86::CondCode CC) {
- switch (CC) {
- default:
- return false;
- case X86::COND_B:
- case X86::COND_BE:
- case X86::COND_E:
- case X86::COND_P:
- case X86::COND_AE:
- case X86::COND_A:
- case X86::COND_NE:
- case X86::COND_NP:
- return true;
+ // Quit if not used as a boolean value.
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ SmallVector<SDValue, 8> Opnds;
+ SDValue VecIn;
+ EVT VT = MVT::Other;
+ unsigned Mask = 0;
+
+ // Recognize a special case where a vector is casted into wide integer to
+ // test all 0s.
+ Opnds.push_back(N->getOperand(0));
+ Opnds.push_back(N->getOperand(1));
+
+ for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
+ SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot;
+ // BFS traverse all OR'd operands.
+ if (I->getOpcode() == ISD::OR) {
+ Opnds.push_back(I->getOperand(0));
+ Opnds.push_back(I->getOperand(1));
+ // Re-evaluate the number of nodes to be traversed.
+ e += 2; // 2 more nodes (LHS and RHS) are pushed.
+ continue;
+ }
+
+ // Quit if a non-EXTRACT_VECTOR_ELT
+ if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // Quit if without a constant index.
+ SDValue Idx = I->getOperand(1);
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ // Check if all elements are extracted from the same vector.
+ SDValue ExtractedFromVec = I->getOperand(0);
+ if (VecIn.getNode() == 0) {
+ VT = ExtractedFromVec.getValueType();
+ // FIXME: only 128-bit vector is supported so far.
+ if (!VT.is128BitVector())
+ return SDValue();
+ VecIn = ExtractedFromVec;
+ } else if (VecIn != ExtractedFromVec)
+ return SDValue();
+
+ // Record the constant index.
+ Mask |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
}
+
+ assert(VT.is128BitVector() && "Only 128-bit vector PTEST is supported so far.");
+
+ // Quit if not all elements are used.
+ if (Mask != (1U << VT.getVectorNumElements()) - 1U)
+ return SDValue();
+
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIn, VecIn);
}
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
// If the flag operand isn't dead, don't touch this CMOV.
@@ -14114,10 +14313,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
SDValue Flags;
- Flags = BoolTestSetCCCombine(Cond, CC);
+ Flags = checkBoolTestSetCCCombine(Cond, CC);
if (Flags.getNode() &&
// Extra check as FCMOV only supports a subset of X86 cond.
- (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) {
+ (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
+ SDValue Ops[] = { FalseOp, TrueOp,
+ DAG.getConstant(CC, MVT::i8), Flags };
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
+ Ops, array_lengthof(Ops));
+ }
+
+ Flags = checkFlaggedOrCombine(Cond, CC, DAG, Subtarget);
+ if (Flags.getNode()) {
SDValue Ops[] = { FalseOp, TrueOp,
DAG.getConstant(CC, MVT::i8), Flags };
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
@@ -15384,7 +15591,7 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
- // into FMINC and MMAXC, which are Commutative operations.
+ // into FMINC and FMAXC, which are Commutative operations.
unsigned NewOp = 0;
switch (N->getOpcode()) {
default: llvm_unreachable("unknown opcode");
@@ -15502,8 +15709,13 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA())
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
+ (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
return SDValue();
SDValue A = N->getOperand(0);
@@ -15525,9 +15737,10 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode;
if (!NegMul)
- Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB;
+ Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
else
- Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB;
+ Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
+
return DAG.getNode(Opcode, dl, VT, A, B, C);
}
@@ -15625,7 +15838,9 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
}
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
@@ -15641,7 +15856,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Flags;
- Flags = BoolTestSetCCCombine(EFLAGS, CC);
+ Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
+ }
+
+ Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget);
if (Flags.getNode()) {
SDValue Cond = DAG.getConstant(CC, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
@@ -15663,7 +15884,14 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
SDValue Flags;
- Flags = BoolTestSetCCCombine(EFLAGS, CC);
+ Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
+ if (Flags.getNode()) {
+ SDValue Cond = DAG.getConstant(CC, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+ Flags);
+ }
+
+ Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget);
if (Flags.getNode()) {
SDValue Cond = DAG.getConstant(CC, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
@@ -15858,7 +16086,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
- case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
@@ -15888,7 +16116,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
- case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
+ case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN:
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index b0c27c8..bfe9541 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -16,15 +16,18 @@
//
// Return instructions.
+//
+// The X86retflag return instructions are variadic because we may add ST0 and
+// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP in {
- def RET : I <0xC3, RawFrm, (outs), (ins),
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
[(X86retflag 0)], IIC_RET>;
def RETW : I <0xC3, RawFrm, (outs), (ins),
"ret{w}",
[], IIC_RET>, OpSize;
- def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret\t$amt",
[(X86retflag timm:$amt)], IIC_RET_IMM>;
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 95ee7e5..5663800 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -19,7 +19,8 @@ let Constraints = "$src1 = $dst" in {
multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
PatFrag MemFrag128, PatFrag MemFrag256,
ValueType OpVT128, ValueType OpVT256,
- SDPatternOperator Op = null_frag, bit MayLoad = 1> {
+ SDPatternOperator Op = null_frag> {
+ let isCommutable = 1 in
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -27,7 +28,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2,
VR128:$src1, VR128:$src3)))]>;
- let mayLoad = MayLoad in
+ let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
@@ -35,6 +36,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
+ let isCommutable = 1 in
def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
@@ -42,7 +44,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
VR256:$src3)))]>;
- let mayLoad = MayLoad in
+ let mayLoad = 1 in
def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
@@ -59,7 +61,7 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
SDNode Op, ValueType OpTy128, ValueType OpTy256> {
defm r213 : fma3p_rm<opc213,
!strconcat(OpcodeStr, !strconcat("213", PackTy)),
- MemFrag128, MemFrag256, OpTy128, OpTy256, Op, 0>;
+ MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
let neverHasSideEffects = 1 in {
defm r132 : fma3p_rm<opc132,
!strconcat(OpcodeStr, !strconcat("132", PackTy)),
@@ -112,148 +114,18 @@ let ExeDomain = SSEPackedDouble in {
v4f64>, VEX_W;
}
-let Predicates = [HasFMA] in {
- def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
- (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1,
- (memopv4f32 addr:$src3)),
- (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
- (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1,
- (memopv4f32 addr:$src3)),
- (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
- (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1,
- (memopv4f32 addr:$src3)),
- (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
- (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1,
- (memopv4f32 addr:$src3)),
- (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
-
- def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1,
- (memopv8f32 addr:$src3)),
- (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1,
- (memopv8f32 addr:$src3)),
- (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1,
- (memopv8f32 addr:$src3)),
- (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1,
- (memopv8f32 addr:$src3)),
- (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
-
- def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
- (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1,
- (memopv2f64 addr:$src3)),
- (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
- (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1,
- (memopv2f64 addr:$src3)),
- (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
- (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1,
- (memopv2f64 addr:$src3)),
- (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
- (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1,
- (memopv2f64 addr:$src3)),
- (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
-
- def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1,
- (memopv4f64 addr:$src3)),
- (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1,
- (memopv4f64 addr:$src3)),
- (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1,
- (memopv4f64 addr:$src3)),
- (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1,
- (memopv4f64 addr:$src3)),
- (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
-
- def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
- (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1,
- (memopv4f32 addr:$src3)),
- (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
- (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1,
- (memopv4f32 addr:$src3)),
- (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
-
- def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1,
- (memopv8f32 addr:$src3)),
- (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1,
- (memopv8f32 addr:$src3)),
- (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
-
- def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
- (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1,
- (memopv2f64 addr:$src3)),
- (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
- (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1,
- (memopv2f64 addr:$src3)),
- (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
-
- def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1,
- (memopv4f64 addr:$src3)),
- (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
- (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1,
- (memopv4f64 addr:$src3)),
- (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
-
-} // Predicates = [HasFMA]
-
let Constraints = "$src1 = $dst" in {
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
- SDPatternOperator OpNode = null_frag, bit MayLoad = 1> {
+ SDPatternOperator OpNode = null_frag> {
+ let isCommutable = 1 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
- let mayLoad = MayLoad in
+ let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
@@ -266,6 +138,7 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
ComplexPattern mem_cpat, Intrinsic IntId,
RegisterClass RC> {
+ let isCommutable = 1 in
def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -294,7 +167,7 @@ let neverHasSideEffects = 1 in {
}
defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
- x86memop, RC, OpVT, mem_frag, OpNode, 0>,
+ x86memop, RC, OpVT, mem_frag, OpNode>,
fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
memop, mem_cpat, Int, RC>;
}
@@ -324,73 +197,102 @@ defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
//===----------------------------------------------------------------------===//
-multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop,
- ComplexPattern mem_cpat, Intrinsic Int> {
- def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
+ PatFrag mem_frag> {
+ let isCommutable = 1 in
+ def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
- def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, memop:$src3),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4;
+ def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4;
- def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, memop:$src2, VR128:$src3),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)))]>, VEX_W, MemOp4;
+ def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+ [(set RC:$dst,
+ (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
// For disassembler
let isCodeGenOnly = 1 in
- def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
}
-multiclass fma4p<bits<8> opc, string OpcodeStr,
- Intrinsic Int128, Intrinsic Int256,
+multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
+ ComplexPattern mem_cpat, Intrinsic Int> {
+ let isCommutable = 1 in
+ def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
+ mem_cpat:$src3))]>, VEX_W, MemOp4;
+ def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, memop:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256> {
+ let isCommutable = 1 in
def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
+ VEX_W, MemOp4;
def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2,
+ [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
(ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
+ (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
+ let isCommutable = 1 in
def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4;
+ (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
+ VEX_W, MemOp4;
def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2,
+ [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
(ld_frag256 addr:$src3)))]>, VEX_W, MemOp4;
def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>;
+ (OpNode VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>;
// For disassembler
let isCodeGenOnly = 1 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
@@ -406,45 +308,58 @@ let isCodeGenOnly = 1 in {
let Predicates = [HasFMA4] in {
-defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfmadd_ss>;
-defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfmadd_sd>;
-defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps,
- int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>;
-defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd,
- int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>;
-defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfmsub_ss>;
-defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfmsub_sd>;
-defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps,
- int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>;
-defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd,
- int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>;
-defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfnmadd_ss>;
-defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmadd_sd>;
-defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps,
- int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>;
-defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd,
- int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>;
-defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfnmsub_ss>;
-defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmsub_sd>;
-defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps,
- int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>;
-defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd,
- int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>;
-defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps,
- int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>;
-defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd,
- int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>;
-defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps,
- int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>;
-defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd,
- int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>;
+defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
+ fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfmadd_ss>;
+defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
+ fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmadd_sd>;
+defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
+ fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfmsub_ss>;
+defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
+ fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmsub_sd>;
+defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+ X86Fnmadd, loadf32>,
+ fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmadd_ss>;
+defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+ X86Fnmadd, loadf64>,
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmadd_sd>;
+defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+ X86Fnmsub, loadf32>,
+ fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmsub_ss>;
+defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+ X86Fnmsub, loadf64>,
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmsub_sd>;
+
+defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
} // HasFMA4
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 81b4f81..55ad2ec 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -287,12 +287,14 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
let CodeSize = 3;
}
+def __xs : XS;
+
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+ !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -303,7 +305,7 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+ !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -314,18 +316,25 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
InstrItinClass itin, Domain d>
: I<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
}
+// MMXPI - SSE 1 & 2 packed instructions with MMX operands
+class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin, Domain d>
+ : I<o, F, outs, ins, asm, pattern, itin, d> {
+ let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]);
+}
+
// PIi8 - SSE 1 & 2 packed instructions with immediate
class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin, Domain d>
: Ii8<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
- !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
@@ -341,18 +350,18 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>;
+ : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>;
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
- Requires<[HasSSE1]>;
+ Requires<[UseSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
- Requires<[HasSSE1]>;
+ Requires<[UseSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
@@ -372,27 +381,31 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
// VSDI - SSE2 instructions with XD prefix in AVX form.
// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form.
+// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
+// MMX operands.
+// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
+// MMX operands.
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>;
class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
- : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
@@ -405,6 +418,12 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
OpSize, Requires<[HasAVX]>;
+class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
+class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
//
@@ -415,21 +434,23 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
- Requires<[HasSSE3]>;
+ Requires<[UseSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
- Requires<[HasSSE3]>;
+ Requires<[UseSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
- Requires<[HasSSE3]>;
+ Requires<[UseSSE3]>;
// SSSE3 Instruction Templates:
//
// SS38I - SSSE3 instructions with T8 prefix.
// SS3AI - SSSE3 instructions with TA prefix.
+// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
+// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
//
// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
// uses the MMX registers. The 64-bit versions are grouped with the MMX
@@ -438,10 +459,18 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
- Requires<[HasSSSE3]>;
+ Requires<[UseSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ Requires<[UseSSSE3]>;
+class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
+class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
@@ -452,11 +481,11 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
- Requires<[HasSSE41]>;
+ Requires<[UseSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
- Requires<[HasSSE41]>;
+ Requires<[UseSSE41]>;
// SSE4.2 Instruction Templates:
//
@@ -464,9 +493,10 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
- Requires<[HasSSE42]>;
+ Requires<[UseSSE42]>;
// SS42FI - SSE 4.2 instructions with T8XD prefix.
+// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
@@ -475,7 +505,7 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
- Requires<[HasSSE42]>;
+ Requires<[UseSSE42]>;
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index ee2d3c4..9035435 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -183,8 +183,8 @@ def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
-def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>;
-def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>;
+def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
+def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -240,6 +240,10 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+// 128-/256-bit extload pattern fragments
+def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
+def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
+
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 459f01a..4f3d824 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1110,6 +1110,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
{ X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
// FIXME: add AVX 256-bit foldable instructions
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_16 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_16 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -1237,6 +1267,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_16 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_16 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1786,10 +1846,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
// All instructions input are two-addr instructions. Get the known operands.
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isKill = MI->getOperand(1).isKill();
+ const MachineOperand &Dest = MI->getOperand(0);
+ const MachineOperand &Src = MI->getOperand(1);
MachineInstr *NewMI = NULL;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
@@ -1807,11 +1865,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
if (B != C) return 0;
- unsigned A = MI->getOperand(0).getReg();
unsigned M = MI->getOperand(3).getImm();
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addReg(A, RegState::Define | getDeadRegState(isDead))
- .addReg(B, getKillRegState(isKill)).addImm(M);
+ .addOperand(Dest).addOperand(Src).addImm(M);
break;
}
case X86::SHUFPDrri: {
@@ -1821,15 +1877,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
if (B != C) return 0;
- unsigned A = MI->getOperand(0).getReg();
unsigned M = MI->getOperand(3).getImm();
// Convert to PSHUFD mask.
M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addReg(A, RegState::Define | getDeadRegState(isDead))
- .addReg(B, getKillRegState(isKill)).addImm(M);
+ .addOperand(Dest).addOperand(Src).addImm(M);
break;
}
case X86::SHL64ri: {
@@ -1840,15 +1894,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (ShAmt == 0 || ShAmt >= 4) return 0;
// LEA can't handle RSP.
- if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(),
+ &X86::GR64_NOSPRegClass))
return 0;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill))
- .addImm(0).addReg(0);
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
break;
}
case X86::SHL32ri: {
@@ -1859,15 +1912,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (ShAmt == 0 || ShAmt >= 4) return 0;
// LEA can't handle ESP.
- if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(),
+ &X86::GR32_NOSPRegClass))
return 0;
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
break;
}
case X86::SHL16ri: {
@@ -1880,10 +1933,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill))
- .addImm(0).addReg(0);
+ .addOperand(Dest)
+ .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
break;
}
default: {
@@ -1906,14 +1957,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
(const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
// LEA can't handle RSP.
- if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src, RC))
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(), RC))
return 0;
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, 1);
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src), 1);
break;
}
case X86::INC16r:
@@ -1921,10 +1970,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, 1);
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src), 1);
break;
case X86::DEC64r:
case X86::DEC32r:
@@ -1936,14 +1983,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
(const TargetRegisterClass*)&X86::GR64_NOSPRegClass :
(const TargetRegisterClass*)&X86::GR32_NOSPRegClass;
// LEA can't handle RSP.
- if (TargetRegisterInfo::isVirtualRegister(Src) &&
- !MF.getRegInfo().constrainRegClass(Src, RC))
+ if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(), RC))
return 0;
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, -1);
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src), -1);
break;
}
case X86::DEC16r:
@@ -1951,10 +1996,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, -1);
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src), -1);
break;
case X86::ADD64rr:
case X86::ADD64rr_DB:
@@ -1981,9 +2024,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return 0;
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, Src2, isKill2);
+ .addOperand(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
// Preserve undefness of the operands.
bool isUndef = MI->getOperand(1).isUndef();
@@ -2003,9 +2045,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, Src2, isKill2);
+ .addOperand(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
+
+ // Preserve undefness of the operands.
+ bool isUndef = MI->getOperand(1).isUndef();
+ bool isUndef2 = MI->getOperand(2).isUndef();
+ NewMI->getOperand(1).setIsUndef(isUndef);
+ NewMI->getOperand(3).setIsUndef(isUndef2);
+
if (LV && isKill2)
LV->replaceKillInstruction(Src2, MI, NewMI);
break;
@@ -2015,10 +2063,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD64ri32_DB:
case X86::ADD64ri8_DB:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
case X86::ADD32ri8:
@@ -2026,10 +2073,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD32ri8_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
break;
}
case X86::ADD16ri:
@@ -2039,10 +2085,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addOperand(Dest).addOperand(Src),
+ MI->getOperand(2).getImm());
break;
}
}
@@ -2051,10 +2096,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (!NewMI) return 0;
if (LV) { // Update live variables
- if (isKill)
- LV->replaceKillInstruction(Src, MI, NewMI);
- if (isDead)
- LV->replaceKillInstruction(Dest, MI, NewMI);
+ if (Src.isKill())
+ LV->replaceKillInstruction(Src.getReg(), MI, NewMI);
+ if (Dest.isDead())
+ LV->replaceKillInstruction(Dest.getReg(), MI, NewMI);
}
MFI->insert(MBBI, NewMI); // Insert the new inst
@@ -3444,6 +3489,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::FsFLD0SS:
case X86::FsFLD0SD:
return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
+ case X86::AVX_SET0:
+ assert(HasAVX && "AVX not supported");
+ return Expand2AddrUndef(MI, get(X86::VXORPSYrr));
+ case X86::V_SETALLONES:
+ return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
+ case X86::AVX2_SETALLONES:
+ return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
@@ -3557,14 +3609,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
- if (MI->getOpcode() == X86::MOV64r0)
- NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
- else if (MI->getOpcode() == X86::MOV32r0)
- NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
- else if (MI->getOpcode() == X86::MOV16r0)
- NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
- else if (MI->getOpcode() == X86::MOV8r0)
- NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
+ unsigned Opc = 0;
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV64r0: Opc = X86::MOV64mi32; break;
+ case X86::MOV32r0: Opc = X86::MOV32mi; break;
+ case X86::MOV16r0: Opc = X86::MOV16mi; break;
+ case X86::MOV8r0: Opc = X86::MOV8mi; break;
+ }
+ if (Opc)
+ NewMI = MakeM0Inst(*this, Opc, MOs, MI);
if (NewMI)
return NewMI;
@@ -3793,15 +3847,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
- case X86::AVX_SET0PSY:
- case X86::AVX_SET0PDY:
case X86::AVX2_SETALLONES:
- case X86::AVX2_SET0:
+ case X86::AVX_SET0:
Alignment = 32;
break;
case X86::V_SET0:
case X86::V_SETALLONES:
- case X86::AVX_SETALLONES:
Alignment = 16;
break;
case X86::FsFLD0SD:
@@ -3837,11 +3888,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (LoadMI->getOpcode()) {
case X86::V_SET0:
case X86::V_SETALLONES:
- case X86::AVX_SET0PSY:
- case X86::AVX_SET0PDY:
- case X86::AVX_SETALLONES:
case X86::AVX2_SETALLONES:
- case X86::AVX2_SET0:
+ case X86::AVX_SET0:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
@@ -3873,15 +3921,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Ty = Type::getFloatTy(MF.getFunction()->getContext());
else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
- else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
- Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
- else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
+ else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES ||
- Opc == X86::AVX2_SETALLONES);
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
@@ -3956,6 +4001,8 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (OpNum == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
+ } else if (OpNum == 3) {
+ OpcodeTablePtr = &RegOp2MemOpTable3;
}
if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index d293156..304676d 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -114,7 +114,7 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
- [SDNPHasChain]>;
+ [SDNPHasChain,SDNPSideEffect]>;
def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
[SDNPHasChain]>;
def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
@@ -552,14 +552,21 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">;
def Has3DNow : Predicate<"Subtarget->has3DNow()">;
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def UseSSE1 : Predicate<"Subtarget->hasSSE1() && Subtarget->hasNoAVX()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def UseSSE2 : Predicate<"Subtarget->hasSSE2() && Subtarget->hasNoAVX()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def UseSSE3 : Predicate<"Subtarget->hasSSE3() && Subtarget->hasNoAVX()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && Subtarget->hasNoAVX()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def UseSSE41 : Predicate<"Subtarget->hasSSE41() && Subtarget->hasNoAVX()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def UseSSE42 : Predicate<"Subtarget->hasSSE42() && Subtarget->hasNoAVX()">;
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
+def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index c8f40bb..bd54858 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -118,11 +118,11 @@ let Constraints = "$src1 = $dst" in {
/// Unary MMX instructions requiring SSSE3.
multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, OpndItins itins> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (memopmmx addr:$src))))],
@@ -134,11 +134,11 @@ let ImmT = NoImm, Constraints = "$src1 = $dst" in {
multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, OpndItins itins> {
let isCommutable = 0 in
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
@@ -149,11 +149,11 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
/// PALIGN MMX instructions (require SSSE3).
multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
- def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
- def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
@@ -163,12 +163,10 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm, OpndItins itins, Domain d> {
- def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))],
- itins.rr, d>;
- def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))],
- itins.rm, d>;
+ def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>;
+ def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -243,29 +241,30 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
[(store (x86mmx VR64:$src), addr:$dst)],
IIC_MMX_MOVQ_RM>;
-def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
- (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (x86mmx (bitconvert
- (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))))],
- IIC_MMX_MOVQ_RR>;
-
-def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
- (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector
- (i64 (bitconvert (x86mmx VR64:$src))))))],
- IIC_MMX_MOVQ_RR>;
+def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+ (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (x86mmx (bitconvert
+ (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))))],
+ IIC_MMX_MOVQ_RR>;
+
+def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64
+ (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src))))))],
+ IIC_MMX_MOVQ_RR>;
let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
- (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [],
- IIC_MMX_MOVQ_RR>;
+def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+ (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+ [], IIC_MMX_MOVQ_RR>;
-def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
- (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [],
- IIC_MMX_MOVQ_RR>;
+def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+ (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+ [], IIC_MMX_MOVQ_RR>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
@@ -577,6 +576,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
IIC_MMX_MASKMOV>;
// 64-bit bit convert.
+let Predicates = [HasSSE2] in {
def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
@@ -585,5 +585,6 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
+}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 220c06d..17e91a6 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -251,35 +251,37 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
// A 128-bit subvector extract from the first 256-bit vector position
// is a subregister copy that needs no instruction.
-def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))),
+def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))),
(v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
-def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))),
+def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))),
(v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
-def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))),
+def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))),
(v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
-def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))),
+def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))),
(v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
-def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))),
+def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))),
(v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
-def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))),
+def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))),
(v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
// A 128-bit subvector insert to the first 256-bit vector position
// is a subregister copy that needs no instruction.
-def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)),
+let AddedComplexity = 25 in { // to give priority over vinsertf128rm
+def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
-def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
+def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)),
(INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
+}
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
@@ -362,7 +364,7 @@ let Predicates = [HasAVX] in {
def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
}
-// Alias instructions that map fld0 to pxor for sse.
+// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1 in {
@@ -382,11 +384,11 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, neverHasSideEffects = 1 in {
-def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>;
+ isPseudo = 1 in {
+def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
-def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
@@ -394,35 +396,29 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-// The same as done above but for AVX. The 256-bit ISA does not support PI,
+// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI,
// and doesn't need it because on sandy bridge the register is set to zero
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementatioan, it does not expand the instructions below like
-// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in {
-let Predicates = [HasAVX] in {
-def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
-def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
-}
-let Predicates = [HasAVX2], neverHasSideEffects = 1 in
-def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
- []>, VEX_4V;
+ isPseudo = 1, Predicates = [HasAVX] in {
+def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>;
}
-let Predicates = [HasAVX2], AddedComplexity = 5 in {
- def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
- def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
- def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
- def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
+let Predicates = [HasAVX] in
+ def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
+ def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
}
-// AVX has no support for 256-bit integer instructions, but since the 128-bit
+// AVX1 has no support for 256-bit integer instructions, but since the 128-bit
// VPXOR instruction writes zero to its upper part, it's safe build zeros.
+let Predicates = [HasAVX1Only] in {
def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
(SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
@@ -438,22 +434,17 @@ def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
(SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
+}
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementation, it does not expand the instructions below like
-// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
- let Predicates = [HasAVX] in
- def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+ isPseudo = 1 in {
+ def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
let Predicates = [HasAVX2] in
- def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
- [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
+ def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8i32 immAllOnesV))]>;
}
@@ -605,27 +596,27 @@ let Predicates = [HasAVX] in {
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
+ (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
(SUBREG_TO_REG (i32 0),
(v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
+ (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
(SUBREG_TO_REG (i64 0),
(v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
// Move low f64 and clear high bits.
@@ -704,7 +695,7 @@ let Predicates = [HasAVX] in {
(VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
let AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
@@ -738,7 +729,7 @@ let Predicates = [HasSSE1] in {
(MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
let AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSD to the lower bits.
@@ -916,16 +907,16 @@ let isCodeGenOnly = 1 in {
let Predicates = [HasAVX] in {
def : Pat<(v8i32 (X86vzmovl
- (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
+ (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl
- (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
+ (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl
- (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
+ (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl
- (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
+ (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
}
@@ -975,10 +966,10 @@ let Predicates = [HasAVX] in {
(VMOVUPDmr addr:$dst, VR128:$src)>;
}
-let Predicates = [HasSSE1] in
+let Predicates = [UseSSE1] in
def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
(MOVUPSmr addr:$dst, VR128:$src)>;
-let Predicates = [HasSSE2] in
+let Predicates = [UseSSE2] in
def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
(MOVUPDmr addr:$dst, VR128:$src)>;
@@ -1028,12 +1019,52 @@ let Predicates = [HasAVX] in {
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+ // Special patterns for storing subvector extracts of lower 128-bits
+ // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+ def : Pat<(alignedstore (v2f64 (extract_subvector
+ (v4f64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4f32 (extract_subvector
+ (v8f32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+
+ def : Pat<(store (v2f64 (extract_subvector
+ (v4f64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v4f32 (extract_subvector
+ (v8f32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
}
// Use movaps / movups for SSE integer load / store (one byte shorter).
// The instructions selected below are then converted to MOVDQA/MOVDQU
// during the SSE domain pass.
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
def : Pat<(alignedloadv2i64 addr:$src),
(MOVAPSrm addr:$src)>;
def : Pat<(loadv2i64 addr:$src),
@@ -1180,7 +1211,7 @@ let Predicates = [HasAVX] in {
(VMOVLPDmr addr:$src1, VR128:$src2)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
(iPTR 0))), addr:$src1),
@@ -1205,7 +1236,7 @@ let Predicates = [HasSSE1] in {
(MOVLPSmr addr:$src1, VR128:$src2)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// Shuffle with MOVLPD
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
@@ -1279,7 +1310,7 @@ let Predicates = [HasAVX] in {
(VMOVHPDrm VR128:$src1, addr:$src2)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
// MOVHPS patterns
def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
@@ -1289,7 +1320,7 @@ let Predicates = [HasSSE1] in {
(MOVHPSrm VR128:$src1, addr:$src2)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold
// cause it has two uses through a bitcast. One use disappears at isel time
@@ -1346,7 +1377,7 @@ let Predicates = [HasAVX] in {
(VMOVHLPSrr VR128:$src1, VR128:$src2)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
// MOVLHPS patterns
def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
@@ -1456,7 +1487,7 @@ def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
-let Predicates = [HasAVX], AddedComplexity = 1 in {
+let Predicates = [HasAVX] in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -1633,7 +1664,7 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- TB, Requires<[HasSSE2]>;
+ TB, Requires<[UseSSE2]>;
/// SSE 2 Only
@@ -1663,7 +1694,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
[(set FR32:$dst, (fround (loadf64 addr:$src)))],
IIC_SSE_CVT_Scalar_RM>,
XD,
- Requires<[HasSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>;
def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1684,13 +1715,13 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>;
}
// Convert scalar single to scalar double
@@ -1709,30 +1740,28 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
}
-let AddedComplexity = 1 in { // give AVX priority
- def : Pat<(f64 (fextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
- def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(f64 (fextend FR32:$src)),
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
+def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
- def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
- Requires<[HasAVX, OptForSpeed]>;
-} // AddedComplexity = 1
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ Requires<[HasAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))],
IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))],
IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[HasSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>;
// extload f32 -> f64. This matches load+fextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1740,9 +1769,9 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
// Since these loads aren't folded into the fextend, we have to match it
// explicitly here.
def : Pat<(fextend (loadf32 addr:$src)),
- (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>;
+ (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>;
def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
+ (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1762,13 +1791,13 @@ def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>;
}
// Convert packed single/double fp to doubleword
@@ -1904,7 +1933,7 @@ let Predicates = [HasAVX] in {
(VCVTTPS2DQYrm addr:$src)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(CVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
@@ -1978,10 +2007,10 @@ def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
-let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>, TB, VEX;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
@@ -1994,15 +2023,15 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
IIC_SSE_CVT_PD_RM>, TB, VEX;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB;
-let neverHasSideEffects = 1, mayLoad = 1 in
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>, TB;
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
+ IIC_SSE_CVT_PD_RM>, TB;
}
// Convert Packed DW Integers to Packed Double FP
@@ -2105,11 +2134,11 @@ let Predicates = [HasAVX] in {
(VCVTPS2PDrr VR128:$src)>;
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
(VCVTPS2PDYrr VR128:$src)>;
- def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDYrm addr:$src)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// Match fextend for 128 conversions
def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
(CVTPS2PDrr VR128:$src)>;
@@ -2336,14 +2365,14 @@ def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
@@ -2420,7 +2449,7 @@ let Predicates = [HasAVX] in {
(VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
}
-let Predicates = [HasSSE1] in {
+let Predicates = [UseSSE1] in {
def : Pat<(v4i32 (X86Shufp VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
(SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
@@ -2428,7 +2457,7 @@ let Predicates = [HasSSE1] in {
(SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// Generic SHUFPD patterns
def : Pat<(v2i64 (X86Shufp VR128:$src1,
(memopv2i64 addr:$src2), (i8 imm:$imm))),
@@ -2500,7 +2529,27 @@ let Constraints = "$src1 = $dst" in {
SSEPackedDouble>, TB, OpSize;
} // Constraints = "$src1 = $dst"
-let Predicates = [HasAVX], AddedComplexity = 1 in {
+let Predicates = [HasAVX1Only] in {
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
@@ -2509,7 +2558,7 @@ let Predicates = [HasAVX], AddedComplexity = 1 in {
(VUNPCKLPDrr VR128:$src, VR128:$src)>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
@@ -2578,16 +2627,16 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
def : Pat<(i32 (X86fgetsign FR32:$src)),
(MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
- Requires<[HasSSE1]>;
+ Requires<[UseSSE1]>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
(MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
- Requires<[HasSSE1]>;
+ Requires<[UseSSE1]>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
(MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
(MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
- Requires<[HasSSE2]>;
+ Requires<[UseSSE2]>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
@@ -2683,14 +2732,12 @@ multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
}
// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let mayLoad = 0 in {
- defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
- SSE_BIT_ITINS_P>;
- defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
- SSE_BIT_ITINS_P>;
- defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
- SSE_BIT_ITINS_P>;
-}
+defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
+ SSE_BIT_ITINS_P>;
+defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
+ SSE_BIT_ITINS_P>;
+defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
+ SSE_BIT_ITINS_P>;
let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef,
@@ -2794,27 +2841,23 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
SizeItins itins,
bit Is2Addr = 1> {
- let mayLoad = 0 in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
TB;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
TB, OpSize;
- }
}
multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
SDNode OpNode,
SizeItins itins> {
- let mayLoad = 0 in {
- defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
+ defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
TB;
- defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
+ defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
TB, OpSize;
- }
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
@@ -2924,7 +2967,7 @@ let Constraints = "$src1 = $dst" in {
}
}
-let isCommutable = 1, isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1 in {
defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
@@ -2978,7 +3021,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[HasSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
@@ -2992,7 +3035,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- let mayLoad = 1 in
+ let mayLoad = 1 in {
def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
@@ -3000,6 +3043,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
(ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ }
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -3062,7 +3106,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
- Requires<[HasSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
@@ -3072,20 +3116,20 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
}
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+let hasSideEffects = 0 in
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
- let neverHasSideEffects = 1 in {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- let mayLoad = 1 in
+ let mayLoad = 1 in {
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- }
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ }
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -3176,7 +3220,6 @@ let Predicates = [HasAVX] in {
SSE_RCPP>, VEX;
}
-let AddedComplexity = 1 in {
def : Pat<(f32 (fsqrt FR32:$src)),
(VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
def : Pat<(f32 (fsqrt (load addr:$src))),
@@ -3199,9 +3242,8 @@ def : Pat<(f32 (X86frcp FR32:$src)),
def : Pat<(f32 (X86frcp (load addr:$src))),
(VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX, OptForSize]>;
-}
-let Predicates = [HasAVX], AddedComplexity = 1 in {
+let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
(COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS VR128:$src, FR32)),
@@ -3322,7 +3364,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
IIC_SSE_MOVNT>;
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
@@ -3482,7 +3524,7 @@ def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
// For Disassembler
let isCodeGenOnly = 1 in {
@@ -3492,7 +3534,7 @@ def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
- [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
+ [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
}
let canFoldAsLoad = 1, mayLoad = 1 in {
@@ -3504,7 +3546,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (loadv2i64 addr:$src))*/],
IIC_SSE_MOVU_P_RM>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
}
let mayStore = 1 in {
@@ -3516,7 +3558,7 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(store (v2i64 VR128:$src), addr:$dst)*/],
IIC_SSE_MOVU_P_MR>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
}
// Intrinsic forms of MOVDQU load and store
@@ -3530,7 +3572,7 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
IIC_SSE_MOVU_P_MR>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
} // ExeDomain = SSEPackedInt
@@ -4028,7 +4070,7 @@ let Predicates = [HasAVX2] in {
(VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
@@ -4210,7 +4252,7 @@ let Predicates = [HasAVX2] in {
defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
}
-let Predicates = [HasSSE2] in {
+let Predicates = [UseSSE2] in {
let AddedComplexity = 5 in
defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
@@ -4325,28 +4367,6 @@ let Constraints = "$src1 = $dst" in {
}
} // ExeDomain = SSEPackedInt
-// Patterns for using AVX1 instructions with integer vectors
-// Here to give AVX2 priority
-let Predicates = [HasAVX] in {
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
- (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
- (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
-
- def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
- (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
- (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
-}
-
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Extract and Insert
//===---------------------------------------------------------------------===//
@@ -4395,7 +4415,7 @@ let Predicates = [HasAVX] in {
}
let Constraints = "$src1 = $dst" in
- defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
+ defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>;
} // ExeDomain = SSEPackedInt
@@ -4556,7 +4576,7 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
// Move Packed Doubleword Int first element to Doubleword Int
//
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "vmov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>,
@@ -4672,14 +4692,14 @@ let Predicates = [HasAVX] in {
}
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))),
+ (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))),
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
}
-let Predicates = [HasSSE2], AddedComplexity = 20 in {
+let Predicates = [UseSSE2], AddedComplexity = 20 in {
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
@@ -4719,7 +4739,7 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))],
IIC_SSE_MOVDQ>, XS,
- Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+ Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
//===---------------------------------------------------------------------===//
// Move Packed Quadword Int to Quadword Int
@@ -4762,7 +4782,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
let Predicates = [HasAVX], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
@@ -4773,7 +4793,7 @@ let Predicates = [HasAVX], AddedComplexity = 20 in {
(VMOVZQI2PQIrm addr:$src)>;
}
-let Predicates = [HasSSE2], AddedComplexity = 20 in {
+let Predicates = [UseSSE2], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
@@ -4803,7 +4823,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -4818,7 +4838,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl
(loadv2i64 addr:$src))))],
IIC_SSE_MOVDQ>,
- XS, Requires<[HasSSE2]>;
+ XS, Requires<[UseSSE2]>;
}
let AddedComplexity = 20 in {
@@ -4828,7 +4848,7 @@ let AddedComplexity = 20 in {
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(VMOVZPQILo2PQIrr VR128:$src)>;
}
- let Predicates = [HasSSE2] in {
+ let Predicates = [UseSSE2] in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZPQILo2PQIrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
@@ -4908,7 +4928,7 @@ let Predicates = [HasAVX] in {
(VMOVSLDUPYrm addr:$src)>;
}
-let Predicates = [HasSSE3] in {
+let Predicates = [UseSSE3] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(MOVSHDUPrr VR128:$src)>;
def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
@@ -4977,7 +4997,7 @@ let Predicates = [HasAVX] in {
(VMOVDDUPYrr VR256:$src)>;
}
-let Predicates = [HasSSE3] in {
+let Predicates = [UseSSE3] in {
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
@@ -5041,7 +5061,7 @@ let Predicates = [HasAVX] in {
f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
}
}
-let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in {
+let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
let ExeDomain = SSEPackedSingle in
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
f128mem, SSE_ALU_F32P>, TB, XD;
@@ -5424,7 +5444,7 @@ let Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Predicates = [HasAVX2] in
defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
-let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
+let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGN : ssse3_palign<"palignr">;
let Predicates = [HasAVX2] in {
@@ -5449,7 +5469,7 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
-let Predicates = [HasSSSE3] in {
+let Predicates = [UseSSSE3] in {
def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
@@ -5583,7 +5603,7 @@ let Predicates = [HasAVX] in {
(VPMOVZXDQrm addr:$src)>;
}
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
// Common patterns involving scalar load.
def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
(PMOVSXBWrm addr:$src)>;
@@ -5633,7 +5653,7 @@ let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
}
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
}
@@ -5704,7 +5724,7 @@ let Predicates = [HasAVX] in {
(VPMOVZXWQrm addr:$src)>;
}
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
(PMOVSXBDrm addr:$src)>;
@@ -5772,7 +5792,7 @@ let Predicates = [HasAVX] in {
(VPMOVZXBQrm addr:$src)>;
}
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
@@ -5918,7 +5938,7 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
imm:$src2))),
addr:$dst),
(EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[HasSSE41]>;
+ Requires<[UseSSE41]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Insert Instructions
@@ -6190,6 +6210,15 @@ let Predicates = [HasAVX] in {
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
def : Pat<(f64 (ftrunc FR64:$src)),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
+ def : Pat<(v4f32 (ffloor VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (ffloor VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v8f32 (ffloor VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x1))>;
+ def : Pat<(v4f64 (ffloor VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x1))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6199,26 +6228,33 @@ let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
-def : Pat<(ffloor FR32:$src),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
-def : Pat<(f64 (ffloor FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
-def : Pat<(f32 (fnearbyint FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
-def : Pat<(f64 (fnearbyint FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
-def : Pat<(f32 (fceil FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
-def : Pat<(f64 (fceil FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
-def : Pat<(f32 (frint FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
-def : Pat<(f64 (frint FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
-def : Pat<(f32 (ftrunc FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
-def : Pat<(f64 (ftrunc FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+let Predicates = [UseSSE41] in {
+ def : Pat<(ffloor FR32:$src),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ def : Pat<(f32 (fnearbyint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ def : Pat<(f32 (frint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ def : Pat<(f64 (ftrunc FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
+ def : Pat<(v4f32 (ffloor VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (ffloor VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x1))>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
@@ -6356,7 +6392,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
Intrinsic IntId256> {
let isCommutable = 1 in
@@ -6705,7 +6741,7 @@ def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}",
(PBLENDVBrm0 VR128:$dst, i128mem:$src2)>;
-let Predicates = [HasSSE41] in {
+let Predicates = [UseSSE41] in {
def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
(v16i8 VR128:$src2))),
(PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
@@ -6802,9 +6838,8 @@ multiclass pseudo_pcmpistrm<string asm> {
}
let Defs = [EFLAGS], usesCustomInserter = 1 in {
- let AddedComplexity = 1 in
- defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
- defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
+ defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>;
}
let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
@@ -6840,9 +6875,8 @@ multiclass pseudo_pcmpestrm<string asm> {
}
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
- let AddedComplexity = 1 in
- defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
- defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
+ defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>;
}
let Predicates = [HasAVX],
@@ -7237,40 +7271,59 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
let Predicates = [HasAVX] in {
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+let Predicates = [HasAVX1Only] in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
- (i32 imm)),
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+ (iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
- (i32 imm)),
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
- (i32 imm)),
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
+ (bc_v8i16 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
}
@@ -7290,56 +7343,61 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
[]>, VEX;
}
-// Extract and store.
-let Predicates = [HasAVX] in {
- def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
- def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
- def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
-
- def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
- def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
- def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))),
- (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
-}
-
// AVX1 patterns
let Predicates = [HasAVX] in {
-def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
- (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v4f32 (VEXTRACTF128rr
(v8f32 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v2f64 (VEXTRACTF128rr
(v4f64 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+
+def : Pat<(alignedstore (v4f32 (vextractf128_extract:$ext (v8f32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v2f64 (vextractf128_extract:$ext (v4f64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+}
+
+let Predicates = [HasAVX1Only] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v2i64 (VEXTRACTF128rr
- (v4i64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v8i16 (VEXTRACTF128rr
- (v16i16 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v16i8 (VEXTRACTF128rr
- (v32i8 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
}
//===----------------------------------------------------------------------===//
@@ -7456,29 +7514,29 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
}
let Predicates = [HasAVX] in {
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
- (memopv8f32 addr:$src2), (i8 imm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
(bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
(memopv4i64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
- (memopv4f64 addr:$src2), (i8 imm:$imm))),
- (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
(bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
@@ -7665,19 +7723,22 @@ let Predicates = [HasAVX2] in {
}
// AVX1 broadcast patterns
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VBROADCASTSDYrm addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSDYrm addr:$src)>;
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
-def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
@@ -7757,7 +7818,6 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
//
-let AddedComplexity = 1 in {
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7768,9 +7828,8 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V;
-}
-let Predicates = [HasAVX2], AddedComplexity = 1 in {
+let Predicates = [HasAVX2] in {
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
@@ -7805,23 +7864,43 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
[]>, VEX_4V;
}
-let Predicates = [HasAVX2], AddedComplexity = 1 in {
+let Predicates = [HasAVX2] in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
- (i32 imm)),
+ (iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
+ (bc_v8i16 (memopv2i64 addr:$src2)),
+ (iPTR imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
}
//===----------------------------------------------------------------------===//
@@ -7838,23 +7917,40 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
(ins i128mem:$dst, VR256:$src1, i8imm:$src2),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
-let Predicates = [HasAVX2], AddedComplexity = 1 in {
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+let Predicates = [HasAVX2] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v2i64 (VEXTRACTI128rr
(v4i64 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v4i32 (VEXTRACTI128rr
(v8i32 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v8i16 (VEXTRACTI128rr
(v16i16 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
(v16i8 (VEXTRACTI128rr
(v32i8 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
+def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
+ (VEXTRACTI128mr addr:$dst, VR256:$src1,
+ (EXTRACT_get_vextractf128_imm VR128:$ext))>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 7ac4cec..764aa5d 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -532,7 +532,7 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
#endif
}
-template<typename T> void addUnaligned(void *Pos, T Delta) {
+template<typename T> static void addUnaligned(void *Pos, T Delta) {
T Value;
std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos),
sizeof(T));
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 9c0ce4e..1c2ef25 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -377,12 +377,6 @@ ReSimplify:
case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
- case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
- case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
- case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
- case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
- case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 877b8f6..3b4cfc4 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -522,7 +522,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const{
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 9087852..0d7b664 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -346,6 +346,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, HasVectorUAMem(false)
, HasCmpxchg16b(false)
, UseLeaForSP(false)
+ , HasSlowDivide(false)
, PostRAScheduler(false)
, stackAlignment(4)
// FIXME: this is a known good value for Yonah. How about others?
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 6841c5b..dde7e24 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -136,6 +136,10 @@ protected:
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
+ /// HasSlowDivide - True if smaller divides are significantly faster than
+ /// full divides and should be used when possible.
+ bool HasSlowDivide;
+
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
@@ -198,6 +202,7 @@ public:
bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool hasAVX() const { return X86SSELevel >= AVX; }
bool hasAVX2() const { return X86SSELevel >= AVX2; }
+ bool hasNoAVX() const { return X86SSELevel < AVX; }
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
@@ -205,7 +210,8 @@ public:
bool hasAES() const { return HasAES; }
bool hasPCLMUL() const { return HasPCLMUL; }
bool hasFMA() const { return HasFMA; }
- bool hasFMA4() const { return HasFMA4; }
+ // FIXME: Favor FMA when both are enabled. Is this the right thing to do?
+ bool hasFMA4() const { return HasFMA4 && !HasFMA; }
bool hasXOP() const { return HasXOP; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
@@ -219,6 +225,7 @@ public:
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
+ bool hasSlowDivide() const { return HasSlowDivide; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 80b75dc..449eed3 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -42,7 +42,6 @@ namespace {
private:
const TargetInstrInfo *TII; // Machine instruction info.
- MachineBasicBlock *MBB; // Current basic block
// Any YMM register live-in to this function?
bool FnHasLiveInYmm;
@@ -84,7 +83,7 @@ namespace {
// 2) All states must be clean for the result to be clean
// 3) If none above and one unknown, the result state is also unknown
//
- unsigned computeState(unsigned PrevState, unsigned CurState) {
+ static unsigned computeState(unsigned PrevState, unsigned CurState) {
if (PrevState == ST_INIT)
return CurState;
@@ -122,7 +121,7 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
}
static bool hasYmmReg(MachineInstr *MI) {
- for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
@@ -189,7 +188,6 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
MachineBasicBlock &BB) {
bool Changed = false;
unsigned BBNum = BB.getNumber();
- MBB = &BB;
// Don't process already solved BBs
if (BBSolved[BBNum])
@@ -207,7 +205,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
// The entry MBB for the function may set the initial state to dirty if
// the function receives any YMM incoming arguments
- if (MBB == MF.begin()) {
+ if (&BB == MF.begin()) {
EntryState = ST_CLEAN;
if (FnHasLiveInYmm)
EntryState = ST_DIRTY;
@@ -253,7 +251,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
// When unknown, only compute the information within the block to have
// it available in the exit if possible, but don't change the block.
if (EntryState != ST_UNKNOWN) {
- BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
+ BuildMI(BB, I, dl, TII->get(X86::VZEROUPPER));
++NumVZU;
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index ae646a2..3e7666b 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -33,7 +33,7 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
SDNPVariadic]>;
def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>;
def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
@@ -58,7 +58,7 @@ def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress,
def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPMayStore]>;
// These are target-independent nodes, but have target-specific formats.
def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index cdd0a08..be5855a 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -176,7 +176,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
#ifndef NDEBUG
DEBUG(errs() << "\nFunction : "
- << MF.getFunction()->getName() << "\n");
+ << MF.getName() << "\n");
DEBUG(errs() << "<--------->\n");
DEBUG(MI.print(errs()));
DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n");