aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp30
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp244
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h11
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp1001
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp148
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp94
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp6
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp28
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h8
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp54
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp64
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td192
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td147
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1379
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td73
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td62
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp24
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp1143
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/AsmParser/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/CMakeLists.txt14
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp171
-rw-r--r--lib/Target/ARM/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/Disassembler/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp26
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h6
-rw-r--r--lib/Target/ARM/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/ARM/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/LLVMBuild.txt4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp46
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp9
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/ARM/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp8
-rw-r--r--lib/Target/ARM/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/ARM/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp7
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp8
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp46
46 files changed, 3610 insertions, 1515 deletions
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index bbca228..6ae287a 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -493,11 +493,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
- case 'p': // The high single-precision register of a VFP double-precision
- // register.
case 'e': // The low doubleword register of a NEON quad register.
- case 'f': // The high doubleword register of a NEON quad register.
+ case 'f': { // The high doubleword register of a NEON quad register.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (!ARM::QPRRegClass.contains(Reg))
+ return true;
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+ ARM::dsub_0 : ARM::dsub_1);
+ O << ARMInstPrinter::getRegisterName(SubReg);
+ return false;
+ }
+
+ // These modifiers are not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
case 'H': // The highest-numbered register of a pair.
return true;
@@ -739,14 +749,14 @@ void ARMAsmPrinter::emitAttributes() {
}
// Signal various FP modes.
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::Allowed);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
ARMBuildAttrs::Allowed);
}
- if (NoInfsFPMath && NoNaNsFPMath)
+ if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::Allowed);
else
@@ -759,7 +769,7 @@ void ARMAsmPrinter::emitAttributes() {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
}
@@ -1069,7 +1079,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
// Try to figure out the unwinding opcode out of src / dst regs.
- if (MI->getDesc().mayStore()) {
+ if (MI->mayStore()) {
// Register saves.
assert(DstReg == ARM::SP &&
"Only stack pointer as a destination reg is supported");
@@ -1481,11 +1491,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
/// is the size in bytes of this constant pool entry.
+ /// The required alignment is specified on the basic block holding this MI.
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
- EmitAlignment(2);
-
// Mark the constant pool entry as data if we're not already in a data
// region.
OutStreamer.EmitDataRegion();
@@ -1934,4 +1943,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() {
RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
}
-
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9315348..8bf5475 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -146,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- bool isLoad = !MCID.mayStore();
+ bool isLoad = !MI->mayStore();
const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
const MachineOperand &Base = MI->getOperand(2);
const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -439,6 +439,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+ if (MI->isBundle()) {
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ int PIdx = I->findFirstPredOperandIdx();
+ if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+ return true;
+ }
+ return false;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
bool ARMBaseInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
@@ -491,7 +507,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// FIXME: This confuses implicit_def with optional CPSR def.
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
+ if (!MCID.getImplicitDefs() && !MI->hasOptionalDef())
return false;
bool Found = false;
@@ -510,11 +526,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
/// By default, this returns true for every instruction with a
/// PredicateOperand.
bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return false;
- if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
ARMFunctionInfo *AFI =
MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
return AFI->isThumb2Function();
@@ -548,7 +563,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
if (MI->isLabel())
return 0;
- unsigned Opc = MI->getOpcode();
+ unsigned Opc = MI->getOpcode();
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
@@ -556,6 +571,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
case ARM::MOVi16_ga_pcrel:
case ARM::MOVTi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel:
@@ -593,7 +610,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
unsigned NumOps = MCID.getNumOperands();
MachineOperand JTOP =
- MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
+ MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
unsigned JTI = JTOP.getIndex();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
assert(MJTI != 0);
@@ -622,6 +639,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0; // Not reached
}
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += GetInstSizeInBytes(&*I);
+ }
+ return Size;
+}
+
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -845,7 +873,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
void ARMBaseInstrInfo::
@@ -991,7 +1019,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
@@ -1357,7 +1385,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
// Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
+ if (MI->isTerminator() || MI->isLabel())
return true;
// Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1762,8 +1790,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change.
- MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
- B = MI->getParent()->begin();
+ MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
@@ -1957,7 +1984,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
bool isKill = UseMI->getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
- *UseMI, UseMI->getDebugLoc(),
+ UseMI, UseMI->getDebugLoc(),
get(NewUseOpc), NewReg)
.addReg(Reg1, getKillRegState(isKill))
.addImm(SOImmValV1)));
@@ -2332,6 +2359,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return UseCycle;
}
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &DefIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_iterator I = MI; ++I;
+ MachineBasicBlock::const_instr_iterator II =
+ llvm::prior(I.getInstrIterator());
+ assert(II->isInsideBundle() && "Empty bundle?");
+
+ int Idx = -1;
+ while (II->isInsideBundle()) {
+ Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (Idx != -1)
+ break;
+ --II;
+ ++Dist;
+ }
+
+ assert(Idx != -1 && "Cannot find bundled definition!");
+ DefIdx = Idx;
+ return II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &UseIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ assert(II->isInsideBundle() && "Empty bundle?");
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ // FIXME: This doesn't properly handle multiple uses.
+ int Idx = -1;
+ while (II != E && II->isInsideBundle()) {
+ Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+ if (Idx != -1)
+ break;
+ if (II->getOpcode() != ARM::t2IT)
+ ++Dist;
+ ++II;
+ }
+
+ if (Idx == -1) {
+ Dist = 0;
+ return 0;
+ }
+
+ UseIdx = Idx;
+ return II;
+}
+
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
@@ -2340,35 +2420,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
DefMI->isRegSequence() || DefMI->isImplicitDef())
return 1;
- const MCInstrDesc &DefMCID = DefMI->getDesc();
if (!ItinData || ItinData->isEmpty())
- return DefMCID.mayLoad() ? 3 : 1;
+ return DefMI->mayLoad() ? 3 : 1;
- const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- if (DefMO.getReg() == ARM::CPSR) {
+ unsigned Reg = DefMO.getReg();
+ if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
return Subtarget.isCortexA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
- if (UseMCID.isBranch())
+ if (UseMI->isBranch())
return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ int Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
}
unsigned DefAlign = DefMI->hasOneMemOperand()
? (*DefMI->memoperands_begin())->getAlignment() : 0;
unsigned UseAlign = UseMI->hasOneMemOperand()
? (*UseMI->memoperands_begin())->getAlignment() : 0;
- int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
- UseMCID, UseIdx, UseAlign);
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+ DefMCID = &DefMI->getDesc();
+ }
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (NewUseMI) {
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+ }
+
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ int Adj = DefAdj + UseAdj;
+ if (Adj) {
+ Latency -= (int)(DefAdj + UseAdj);
+ if (Latency < 1)
+ return 1;
+ }
if (Latency > 1 &&
(Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
@@ -2393,7 +2515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
if (DefAlign < 8 && Subtarget.isCortexA9())
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
case ARM::VLD1q16:
@@ -2413,12 +2535,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2q8:
case ARM::VLD2q16:
case ARM::VLD2q32:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2446,9 +2574,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16:
case ARM::VLD1DUPq32:
- case ARM::VLD1DUPq8_UPD:
- case ARM::VLD1DUPq16_UPD:
- case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16:
case ARM::VLD2DUPd32:
@@ -2580,12 +2711,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2d8PseudoWB_fixed:
+ case ARM::VLD2d16PseudoWB_fixed:
+ case ARM::VLD2d32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8PseudoWB_register:
+ case ARM::VLD2d16PseudoWB_register:
+ case ARM::VLD2d32PseudoWB_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -2621,9 +2758,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD1DUPq8Pseudo:
case ARM::VLD1DUPq16Pseudo:
case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD1DUPq8PseudoWB_fixed:
+ case ARM::VLD1DUPq16PseudoWB_fixed:
+ case ARM::VLD1DUPq32PseudoWB_fixed:
+ case ARM::VLD1DUPq8PseudoWB_register:
+ case ARM::VLD1DUPq16PseudoWB_register:
+ case ARM::VLD1DUPq32PseudoWB_register:
case ARM::VLD2DUPd8Pseudo:
case ARM::VLD2DUPd16Pseudo:
case ARM::VLD2DUPd32Pseudo:
@@ -2671,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned
+ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const {
+ unsigned Reg = DefMI->getOperand(DefIdx).getReg();
+ if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
+ return 1;
+
+ // If the second MI is predicated, then there is an implicit use dependency.
+ return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
+ DepMI->getNumOperands());
+}
+
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -2681,6 +2834,17 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
if (!ItinData || ItinData->isEmpty())
return 1;
+ if (MI->isBundle()) {
+ int Latency = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ if (I->getOpcode() != ARM::t2IT)
+ Latency += getInstrLatency(ItinData, I, PredCost);
+ }
+ return Latency;
+ }
+
const MCInstrDesc &MCID = MI->getDesc();
unsigned Class = MCID.getSchedClass();
unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0f9f321..68e8208 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -69,10 +69,7 @@ public:
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
// Predication support.
- bool isPredicated(const MachineInstr *MI) const {
- int PIdx = MI->findFirstPredOperandIdx();
- return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
- }
+ bool isPredicated(const MachineInstr *MI) const;
ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
int PIdx = MI->findFirstPredOperandIdx();
@@ -213,12 +210,18 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
+ virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const;
+
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
private:
+ unsigned getInstBundleLength(const MachineInstr *MI) const;
+
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 7c42342..8ee6ce2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -631,7 +631,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() &&
(!MFI->hasVarSizedObjects() || EnableBasePointer));
}
@@ -649,7 +649,7 @@ needsStackRealignment(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
return true;
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
|| needsStackRealignment(MF);
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index d74ccfa..365f0bb 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -401,7 +401,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I)
emitInstruction(*I);
}
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 3e3a413..2039d41 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -26,6 +26,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -51,6 +52,43 @@ static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
+static cl::opt<bool>
+AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
+ cl::desc("Align constant islands in code"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits. This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
+/// add padding such that:
+///
+/// 1. The result is aligned to 1 << LogAlign.
+///
+/// 2. No other value of the unknown bits would require more padding.
+///
+/// This may add more padding than is required to satisfy just one of the
+/// constraints. It is necessary to compute alignment this way to guarantee
+/// that we don't underestimate the padding before an aligned block. If the
+/// real padding before a block is larger than we think, constant pool entries
+/// may go out of range.
+static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
+ unsigned KnownBits) {
+ // Add the worst possible padding that the unknown bits could cause.
+ Offset += UnknownPadding(LogAlign, KnownBits);
+
+ // Then align the result.
+ return RoundUpToAlignment(Offset, 1u << LogAlign);
+}
+
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
@@ -64,16 +102,70 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
- /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
- /// by MBB Number. The two-byte pads required for Thumb alignment are
- /// counted as part of the following block (i.e., the offset and size for
- /// a padded block will both be ==2 mod 4).
- std::vector<unsigned> BBSizes;
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// The offset is always aligned as required by the basic block.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// KnownBits - The number of low bits in Offset that are known to be
+ /// exact. The remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// Unalign - When non-zero, the block contains instructions (inline asm)
+ /// of unknown size. The real size may be smaller than Size bytes by a
+ /// multiple of 1 << Unalign.
+ uint8_t Unalign;
+
+ /// PostAlign - When non-zero, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to 1 << PostAlign
+ /// bytes.
+ uint8_t PostAlign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+ PostAlign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ return Unalign ? Unalign : KnownBits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+ if (!LA)
+ return PO;
+ // Add alignment padding from the terminator.
+ return WorstCaseAlign(PO, LA, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(std::max(unsigned(PostAlign), LogAlign),
+ internalKnownBits());
+ }
+ };
- /// BBOffsets - the offset of each MBB in bytes, starting from 0.
- /// The two-byte pads required for Thumb alignment are counted as part of
- /// the following block.
- std::vector<unsigned> BBOffsets;
+ std::vector<BasicBlockInfo> BBInfo;
/// WaterList - A sorted list of basic blocks where islands could be placed
/// (i.e. blocks that don't fall through to the following block, due
@@ -162,9 +254,8 @@ namespace {
/// the branch fix up pass.
bool HasFarJump;
- /// HasInlineAsm - True if the function contains inline assembly.
- bool HasInlineAsm;
-
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
const ARMInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
@@ -182,67 +273,65 @@ namespace {
}
private:
- void DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs);
+ void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
- void JumpTableFunctionScan(MachineFunction &MF);
- void InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs);
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void JumpTableFunctionScan();
+ void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
- void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB);
bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
MachineBasicBlock *&NewMBB);
- bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+ bool HandleConstantPoolUser(unsigned CPUserIndex);
void RemoveDeadCPEMI(MachineInstr *CPEMI);
bool RemoveUnusedCPEntries();
bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned Disp, bool NegOk,
bool DoDump = false);
bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
- CPUser &U);
- bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
- unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ CPUser &U, unsigned &Growth);
bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
- bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpImmediateBr(ImmBranch &Br);
+ bool FixUpConditionalBr(ImmBranch &Br);
+ bool FixUpUnconditionalBr(ImmBranch &Br);
bool UndoLRSpillRestore();
- bool OptimizeThumb2Instructions(MachineFunction &MF);
- bool OptimizeThumb2Branches(MachineFunction &MF);
- bool ReorderThumb2JumpTables(MachineFunction &MF);
- bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ bool OptimizeThumb2Instructions();
+ bool OptimizeThumb2Branches();
+ bool ReorderThumb2JumpTables();
+ bool OptimizeThumb2JumpTables();
MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
+ void ComputeBlockSize(MachineBasicBlock *MBB);
unsigned GetOffsetOf(MachineInstr *MI) const;
void dumpBBs();
- void verify(MachineFunction &MF);
+ void verify();
+
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return OffsetIsInRange(UserOffset, TrialOffset,
+ U.MaxDisp, U.NegOk, U.IsSoImm);
+ }
};
char ARMConstantIslands::ID = 0;
}
/// verify - check BBOffsets, BBSizes, alignment of islands
-void ARMConstantIslands::verify(MachineFunction &MF) {
- assert(BBOffsets.size() == BBSizes.size());
- for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
- assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
- if (!isThumb)
- return;
+void ARMConstantIslands::verify() {
#ifndef NDEBUG
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() &&
- MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned MBBId = MBB->getNumber();
- assert(HasInlineAsm ||
- (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
- (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
- }
+ unsigned Align = MBB->getAlignment();
+ unsigned MBBId = MBB->getNumber();
+ assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
CPUser &U = CPUsers[i];
@@ -257,10 +346,16 @@ void ARMConstantIslands::verify(MachineFunction &MF) {
/// print block size and offset information - debugging
void ARMConstantIslands::dumpBBs() {
- for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
- DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
- << " size " << BBSizes[J] << "\n");
- }
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << " pa=" << unsigned(BBI.PostAlign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
}
/// createARMConstantIslandPass - returns an instance of the constpool
@@ -269,34 +364,38 @@ FunctionPass *llvm::createARMConstantIslandPass() {
return new ARMConstantIslands();
}
-bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
- MachineConstantPool &MCP = *MF.getConstantPool();
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MCP = mf.getConstantPool();
- TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
- AFI = MF.getInfo<ARMFunctionInfo>();
- STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+ DEBUG(dbgs() << "***** ARMConstantIslands: "
+ << MCP->getConstants().size() << " CP entries, aligned to "
+ << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+ TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo();
+ AFI = MF->getInfo<ARMFunctionInfo>();
+ STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
- HasInlineAsm = false;
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
// Try to reorder and otherwise adjust the block layout to make good use
// of the TB[BH] instructions.
bool MadeChange = false;
if (isThumb2 && AdjustJumpTableBlocks) {
- JumpTableFunctionScan(MF);
- MadeChange |= ReorderThumb2JumpTables(MF);
+ JumpTableFunctionScan();
+ MadeChange |= ReorderThumb2JumpTables();
// Data is out of date, so clear it. It'll be re-computed later.
T2JumpTables.clear();
// Blocks may have shifted around. Keep the numbering up to date.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
}
// Thumb1 functions containing constant pools get 4-byte alignment.
@@ -304,16 +403,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// ARM and Thumb2 functions need to be 4-byte aligned.
if (!isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
+ MF->EnsureAlignment(2); // 2 = log2(4)
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
- if (!MCP.isEmpty()) {
- DoInitialPlacement(MF, CPEMIs);
- if (isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
- }
+ if (!MCP->isEmpty())
+ DoInitialPlacement(CPEMIs);
/// The next UID to take is the first unused one.
AFI->initPICLabelUId(CPEMIs.size());
@@ -321,7 +417,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Do the initial scan of the function, building up information about the
// sizes of each block, the location of all the water, and finding all of the
// constant pool users.
- InitialFunctionScan(MF, CPEMIs);
+ InitialFunctionScan(CPEMIs);
CPEMIs.clear();
DEBUG(dumpBBs());
@@ -333,9 +429,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// is no change.
unsigned NoCPIters = 0, NoBRIters = 0;
while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
bool CPChange = false;
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
- CPChange |= HandleConstantPoolUser(MF, i);
+ CPChange |= HandleConstantPoolUser(i);
if (CPChange && ++NoCPIters > 30)
llvm_unreachable("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
@@ -344,9 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// appear as "new water" for the next iteration of constant pool placement.
NewWaterList.clear();
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
bool BRChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
- BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+ BRChange |= FixUpImmediateBr(ImmBranches[i]);
if (BRChange && ++NoBRIters > 30)
llvm_unreachable("Branch Fix Up pass failed to converge!");
DEBUG(dumpBBs());
@@ -358,10 +456,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Shrink 32-bit Thumb2 branch, load, and store instructions.
if (isThumb2 && !STI->prefers32BitThumb())
- MadeChange |= OptimizeThumb2Instructions(MF);
+ MadeChange |= OptimizeThumb2Instructions();
// After a while, this might be made debug-only, but it is not expensive.
- verify(MF);
+ verify();
// If LR has been forced spilled and no far jump (i.e. BL) has been issued,
// undo the spill / restore of LR if possible.
@@ -376,10 +474,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
}
- DEBUG(errs() << '\n'; dumpBBs());
+ DEBUG(dbgs() << '\n'; dumpBBs());
- BBSizes.clear();
- BBOffsets.clear();
+ BBInfo.clear();
WaterList.clear();
CPUsers.clear();
CPEntries.clear();
@@ -392,37 +489,65 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
/// DoInitialPlacement - Perform the initial placement of the constant pool
/// entries. To start with, we put them all at the end of the function.
-void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs) {
+void
+ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Create the basic block to hold the CPE's.
- MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
- MF.push_back(BB);
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->EnsureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
- const std::vector<MachineConstantPoolEntry> &CPs =
- MF.getConstantPool()->getConstants();
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const TargetData &TD = *MF.getTarget().getTargetData();
+ const TargetData &TD = *MF->getTarget().getTargetData();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
- // Verify that all constant pool entries are a multiple of 4 bytes. If not,
- // we would have to pad them out or something so that instructions stay
- // aligned.
- assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
MachineInstr *CPEMI =
- BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(i).addConstantPoolIndex(i).addImm(Size);
CPEMIs.push_back(CPEMI);
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a < MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+
// Add a new CPEntry, but no corresponding CPUser yet.
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
++NumCPEs;
- DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
- << "\n");
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n");
}
+ DEBUG(BB->dump());
}
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
@@ -458,17 +583,33 @@ ARMConstantIslands::CPEntry
return NULL;
}
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+
+ // Everything is 4-byte aligned unless AlignConstantIslands is set.
+ if (!AlignConstantIslands)
+ return 2;
+
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
/// JumpTableFunctionScan - Do a scan of the function, building up
/// information about the sizes of each block and the locations of all
/// the jump tables.
-void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+void ARMConstantIslands::JumpTableFunctionScan() {
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I)
- if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
T2JumpTables.push_back(I);
}
}
@@ -476,23 +617,27 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
/// InitialFunctionScan - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
-void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs) {
- // First thing, see if the function has any inline assembly in it. If so,
- // we have to be conservative about alignment assumptions, as we don't
- // know for sure the size of any instructions in the inline assembly.
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I)
- if (I->getOpcode() == ARM::INLINEASM)
- HasInlineAsm = true;
- }
+void ARMConstantIslands::
+InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ ComputeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ AdjustBBOffsetsAfter(MF->begin());
// Now go back through the instructions and build up our data structures.
- unsigned Offset = 0;
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
@@ -501,16 +646,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
if (!BBHasFallthrough(&MBB))
WaterList.push_back(&MBB);
- unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
if (I->isDebugValue())
continue;
- // Add instruction size to MBBSize.
- MBBSize += TII->GetInstSizeInBytes(I);
int Opc = I->getOpcode();
- if (I->getDesc().isBranch()) {
+ if (I->isBranch()) {
bool isCond = false;
unsigned Bits = 0;
unsigned Scale = 1;
@@ -518,18 +660,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
switch (Opc) {
default:
continue; // Ignore other JT branches
- case ARM::tBR_JTr:
- // A Thumb1 table jump may involve padding; for the offsets to
- // be right, functions containing these must be 4-byte aligned.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr + 2
- // is aligned. That is held in Offset+MBBSize, which already has
- // 2 added in for the size of the mov pc instruction.
- MF.EnsureAlignment(2U);
- if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
- // FIXME: Add a pseudo ALIGN instruction instead.
- MBBSize += 2; // padding
- continue; // Does not get an entry in ImmBranches
case ARM::t2BR_JT:
T2JumpTables.push_back(I);
continue; // Does not get an entry in ImmBranches
@@ -647,18 +777,30 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
break;
}
}
+ }
+}
- // In thumb mode, if this block is a constpool island, we may need padding
- // so it's aligned on 4 byte boundary.
- if (isThumb &&
- !MBB.empty() &&
- MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- ((Offset%4) != 0 || HasInlineAsm))
- MBBSize += 2;
-
- BBSizes.push_back(MBBSize);
- BBOffsets.push_back(Offset);
- Offset += MBBSize;
+/// ComputeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->GetInstSizeInBytes(I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ }
+
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->EnsureAlignment(2);
}
}
@@ -671,14 +813,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
// The offset is composed of two things: the sum of the sizes of all MBB's
// before this instruction's block, and the offset from the start of the block
// it is in.
- unsigned Offset = BBOffsets[MBB->getNumber()];
-
- // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
- // alignment padding, and compensate if so.
- if (isThumb &&
- MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- (Offset%4 != 0 || HasInlineAsm))
- Offset += 2;
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
// Sum instructions before MI in MBB.
for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
@@ -702,12 +837,9 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
// Renumber the MBB's to keep them consecutive.
NewBB->getParent()->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add NewMBB as having
// available water after it.
@@ -723,13 +855,12 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
/// account for this change and returns the newly created block.
MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
- MachineFunction &MF = *OrigBB->getParent();
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
MachineFunction::iterator MBBI = OrigBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -747,16 +878,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
- while (!OrigBB->succ_empty()) {
- MachineBasicBlock *Succ = *OrigBB->succ_begin();
- OrigBB->removeSuccessor(Succ);
- NewBB->addSuccessor(Succ);
-
- // This pass should be run after register allocation, so there should be no
- // PHI nodes to update.
- assert((Succ->empty() || !Succ->begin()->isPHI())
- && "PHI nodes should be eliminated by now!");
- }
+ NewBB->transferSuccessors(OrigBB);
// OrigBB branches to NewBB.
OrigBB->addSuccessor(NewBB);
@@ -764,14 +886,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
// Update internal data structures to account for the newly inserted MBB.
// This is almost the same as UpdateForInsertedWaterBlock, except that
// the Water goes after OrigBB, not NewBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add OrigMBB as having
// available water after it (but not if it's already there, which happens
@@ -787,54 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- unsigned OrigBBI = OrigBB->getNumber();
- unsigned NewBBI = NewBB->getNumber();
-
- int delta = isThumb1 ? 2 : 4;
-
// Figure out how large the OrigBB is. As the first half of the original
// block, it cannot contain a tablejump. The size includes
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
- unsigned OrigBBSize = 0;
- for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
- I != E; ++I)
- OrigBBSize += TII->GetInstSizeInBytes(I);
- BBSizes[OrigBBI] = OrigBBSize;
-
- // ...and adjust BBOffsets for NewBB accordingly.
- BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ ComputeBlockSize(OrigBB);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
- // Set the size of NewBB in BBSizes. It does not include any padding now.
- BBSizes[NewBBI] = NewBBSize;
-
- MachineInstr* ThumbJTMI = prior(NewBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- // We've added another 2-byte instruction before this tablejump, which
- // means we will always need padding if we didn't before, and vice versa.
-
- // The original offset of the jump instruction was:
- unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
- if (OrigOffset%4 == 0) {
- // We had padding before and now we don't. No net change in code size.
- delta = 0;
- } else {
- // We didn't have padding before and now we do.
- BBSizes[NewBBI] += 2;
- delta = 4;
- }
- }
+ ComputeBlockSize(NewBB);
// All BBOffsets following these blocks must be modified.
- if (delta)
- AdjustBBOffsetsAfter(NewBB, delta);
+ AdjustBBOffsetsAfter(OrigBB);
return NewBB;
}
@@ -882,19 +966,44 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
/// WaterIsInRange - Returns true if a CPE placed after the specified
/// Water (a basic block) will be in range for the specific MI.
-
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
- MachineBasicBlock* Water, CPUser &U) {
- unsigned MaxDisp = U.MaxDisp;
- unsigned CPEOffset = BBOffsets[Water->getNumber()] +
- BBSizes[Water->getNumber()];
-
- // If the CPE is to be inserted before the instruction, that will raise
- // the offset of the instruction.
- if (CPEOffset < UserOffset)
- UserOffset += U.CPEMI->getOperand(2).getImm();
-
- return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water;
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return OffsetIsInRange(UserOffset, CPEOffset, U);
}
/// CPEIsInRange - Returns true if the distance between specific MI and
@@ -903,14 +1012,20 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
unsigned CPEOffset = GetOffsetOf(CPEMI);
- assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+ assert(CPEOffset % 4 == 0 && "Misaligned CPE");
if (DoDump) {
- DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
- << " max delta=" << MaxDisp
- << " insn address=" << UserOffset
- << " CPE address=" << CPEOffset
- << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
}
return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
@@ -933,55 +1048,17 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
}
#endif // NDEBUG
-void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
- int delta) {
- MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
- for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
- i < e; ++i) {
- BBOffsets[i] += delta;
- // If some existing blocks have padding, adjust the padding as needed, a
- // bit tricky. delta can be negative so don't use % on that.
- if (!isThumb)
- continue;
- MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() && !HasInlineAsm) {
- // Constant pool entries require padding.
- if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned OldOffset = BBOffsets[i] - delta;
- if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- }
- }
- // Thumb1 jump tables require padding. They should be at the end;
- // following unconditional branches are removed by AnalyzeBranch.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr
- // is aligned; if it is, the offset of the jump table following the
- // instruction will not be aligned, and we need padding.
- MachineInstr *ThumbJTMI = prior(MBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
- unsigned OldMIOffset = NewMIOffset - delta;
- if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- }
- }
- if (delta==0)
- return;
- }
- MBBI = llvm::next(MBBI);
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins.
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
}
}
@@ -1016,7 +1093,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
// Check to see if the CPE is already in-range.
if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
- DEBUG(errs() << "In range\n");
+ DEBUG(dbgs() << "In range\n");
return 1;
}
@@ -1031,7 +1108,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
if (CPEs[i].CPEMI == NULL)
continue;
if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
- DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
<< CPEs[i].CPI << "\n");
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
@@ -1079,10 +1156,9 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
if (WaterList.empty())
return false;
- bool FoundWaterThatWouldPad = false;
- water_iterator IPThatWouldPad;
- for (water_iterator IP = prior(WaterList.end()),
- B = WaterList.begin();; --IP) {
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ --IP) {
MachineBasicBlock* WaterBB = *IP;
// Check if water is in range and is either at a lower address than the
// current "high water mark" or a new water block that was created since
@@ -1092,31 +1168,24 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
// should be relatively uncommon and when it does happen, we want to be
// sure to take advantage of it for all the CPEs near that block, so that
// we don't insert more branches than necessary.
- if (WaterIsInRange(UserOffset, WaterBB, U) &&
+ unsigned Growth;
+ if (WaterIsInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
- NewWaterList.count(WaterBB))) {
- unsigned WBBId = WaterBB->getNumber();
- if (isThumb &&
- (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
- // This is valid Water, but would introduce padding. Remember
- // it in case we don't find any Water that doesn't do this.
- if (!FoundWaterThatWouldPad) {
- FoundWaterThatWouldPad = true;
- IPThatWouldPad = IP;
- }
- } else {
- WaterIter = IP;
+ NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
return true;
- }
}
if (IP == B)
break;
}
- if (FoundWaterThatWouldPad) {
- WaterIter = IPThatWouldPad;
- return true;
- }
- return false;
+ return BestGrowth != ~0u;
}
/// CreateNewWater - No existing WaterList entry will work for
@@ -1132,114 +1201,143 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
- unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
- BBSizes[UserMBB->getNumber()];
- assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
// end of the block is within range, make new water there. (The addition
// below is for the unconditional branch we will be adding: 4 bytes on ARM +
// Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
// inside OffsetIsInRange.
- if (BBHasFallthrough(UserMBB) &&
- OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- DEBUG(errs() << "Split at end of block\n");
- if (&UserMBB->back() == UserMI)
- assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
- NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
- // Add an unconditional branch from UserMBB to fallthrough block.
- // Record it for branch lengthening; this new branch will not get out of
- // range, but if the preceding conditional branch is out of range, the
- // targets will be exchanged, and the altered branch may be out of
- // range, so the machinery has to know about it.
- int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
- if (!isThumb)
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
- else
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
- .addImm(ARMCC::AL).addReg(0);
- unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
- ImmBranches.push_back(ImmBranch(&UserMBB->back(),
- MaxDisp, false, UncondBr));
- int delta = isThumb1 ? 2 : 4;
- BBSizes[UserMBB->getNumber()] += delta;
- AdjustBBOffsetsAfter(UserMBB, delta);
- } else {
- // What a big block. Find a place within the block to split it.
- // This is a little tricky on Thumb1 since instructions are 2 bytes
- // and constant pool entries are 4 bytes: if instruction I references
- // island CPE, and instruction I+1 references CPE', it will
- // not work well to put CPE as far forward as possible, since then
- // CPE' cannot immediately follow it (that location is 2 bytes
- // farther away from I+1 than CPE was from I) and we'd need to create
- // a new island. So, we make a first guess, then walk through the
- // instructions between the one currently being looked at and the
- // possible insertion point, and make sure any other instructions
- // that reference CPEs will be able to use the same island area;
- // if not, we back up the insertion point.
-
- // The 4 in the following is for the unconditional branch we'll be
- // inserting (allows for long branch on Thumb1). Alignment of the
- // island is handled inside OffsetIsInRange.
- unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
- // This could point off the end of the block if we've already got
- // constant pool entries following this block; only the last one is
- // in the water list. Back past any possible branches (allow for a
- // conditional and a maximally long unconditional).
- if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
- BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
- (isThumb1 ? 6 : 8);
- unsigned EndInsertOffset = BaseInsertOffset +
- CPEMI->getOperand(2).getImm();
- MachineBasicBlock::iterator MI = UserMI;
- ++MI;
- unsigned CPUIndex = CPUserIndex+1;
- unsigned NumCPUsers = CPUsers.size();
- MachineInstr *LastIT = 0;
- for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
- Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
- CPUser &U = CPUsers[CPUIndex];
- if (!OffsetIsInRange(Offset, EndInsertOffset,
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- BaseInsertOffset -= (isThumb1 ? 2 : 4);
- EndInsertOffset -= (isThumb1 ? 2 : 4);
- }
- // This is overly conservative, as we don't account for CPEMIs
- // being reused within the block, but it doesn't matter much.
- EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
- CPUIndex++;
- }
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = isThumb1 ? 2 : 4;
+ // End of UserBlock after adding a branch.
+ unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
+ UserBBI.postKnownBits());
+
+ if (OffsetIsInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += Delta;
+ AdjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
- // Remember the last IT instruction.
- if (MI->getOpcode() == ARM::t2IT)
- LastIT = MI;
+ // What a big block. Find a place within the block to split it. This is a
+ // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+ // entries are 4 bytes: if instruction I references island CPE, and
+ // instruction I+1 references CPE', it will not work well to put CPE as far
+ // forward as possible, since then CPE' cannot immediately follow it (that
+ // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+ // need to create a new island. So, we make a first guess, then walk through
+ // the instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions that
+ // reference CPEs will be able to use the same island area; if not, we back
+ // up the insertion point.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then
+ // WorstCaseAlign to LogAlign.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned KnownBits = UserBBI.internalKnownBits();
+ unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp;
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // Account for alignment and unknown padding.
+ BaseInsertOffset &= ~((1u << LogAlign) - 1);
+ BaseInsertOffset -= UPad;
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // (allows for long branch on Thumb1). Alignment of the island is handled
+ // inside OffsetIsInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign
+ << " kb=" << KnownBits
+ << " up=" << UPad << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
+ BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
+ (isThumb1 ? 6 : 8);
+ unsigned EndInsertOffset =
+ WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!OffsetIsInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
+ 1u << getCPELogAlign(U.CPEMI)) +
+ U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
}
- DEBUG(errs() << "Split in middle of big block\n");
- --MI;
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
- // Avoid splitting an IT block.
- if (LastIT) {
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
- if (CC != ARMCC::AL)
- MI = LastIT;
- }
- NewMBB = SplitBlockBeforeInstr(MI);
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
}
+ NewMBB = SplitBlockBeforeInstr(MI);
}
/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
/// is out-of-range. If so, pick up the constant pool value and move it some
/// place in-range. Return true if we changed any addresses (thus must run
/// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
- unsigned CPUserIndex) {
+bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
@@ -1260,11 +1358,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
unsigned ID = AFI->createPICLabelUId();
// Look for water where we can place this CPE.
- MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
water_iterator IP;
if (LookForWater(U, UserOffset, IP)) {
- DEBUG(errs() << "found water in range\n");
+ DEBUG(dbgs() << "Found water in range\n");
MachineBasicBlock *WaterBB = *IP;
// If the original WaterList entry was "new water" on this iteration,
@@ -1279,7 +1377,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
} else {
// No water found.
- DEBUG(errs() << "No water found\n");
+ DEBUG(dbgs() << "No water found\n");
CreateNewWater(CPUserIndex, UserOffset, NewMBB);
// SplitBlockBeforeInstr adds to WaterList, which is important when it is
@@ -1304,7 +1402,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF.insert(NewMBB, NewIsland);
+ MF->insert(NewMBB, NewIsland);
// Update internal data structures to account for the newly inserted MBB.
UpdateForInsertedWaterBlock(NewIsland);
@@ -1320,13 +1418,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
- BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
- // Compensate for .align 2 in thumb mode.
- if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
- Size += 2;
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
// Increase the size of the island block to account for the new entry.
- BBSizes[NewIsland->getNumber()] += Size;
- AdjustBBOffsetsAfter(NewIsland, Size);
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1335,8 +1432,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
break;
}
- DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI
- << '\t' << *UserMI);
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
return true;
}
@@ -1347,19 +1444,18 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
MachineBasicBlock *CPEBB = CPEMI->getParent();
unsigned Size = CPEMI->getOperand(2).getImm();
CPEMI->eraseFromParent();
- BBSizes[CPEBB->getNumber()] -= Size;
+ BBInfo[CPEBB->getNumber()].Size -= Size;
// All succeeding offsets have the current size value added in, fix this.
if (CPEBB->empty()) {
- // In thumb1 mode, the size of island may be padded by two to compensate for
- // the alignment requirement. Then it will now be 2 when the block is
- // empty, so fix this.
- // All succeeding offsets have the current size value added in, fix this.
- if (BBSizes[CPEBB->getNumber()] != 0) {
- Size += BBSizes[CPEBB->getNumber()];
- BBSizes[CPEBB->getNumber()] = 0;
- }
- }
- AdjustBBOffsetsAfter(CPEBB, -Size);
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned. <rdar://problem/10534709>.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ AdjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
// this BB's predecessor jumps directly to this BB's successor. This
// shouldn't happen currently.
@@ -1390,9 +1486,9 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
unsigned MaxDisp) {
unsigned PCAdj = isThumb ? 4 : 8;
unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
- DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
<< " from BB#" << MI->getParent()->getNumber()
<< " max delta=" << MaxDisp
<< " from " << GetOffsetOf(MI) << " to " << DestOffset
@@ -1411,7 +1507,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
/// away to fit in its displacement field.
-bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
@@ -1420,8 +1516,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
return false;
if (!Br.isCond)
- return FixUpUnconditionalBr(MF, Br);
- return FixUpConditionalBr(MF, Br);
+ return FixUpUnconditionalBr(Br);
+ return FixUpConditionalBr(Br);
}
/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
@@ -1429,7 +1525,7 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
/// spilled in the epilogue, then we can use BL to implement a far jump.
/// Otherwise, add an intermediate branch instruction to a branch.
bool
-ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *MBB = MI->getParent();
if (!isThumb1)
@@ -1438,12 +1534,12 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
// Use BL to implement far jump.
Br.MaxDisp = (1 << 21) * 2;
MI->setDesc(TII->get(ARM::tBfar));
- BBSizes[MBB->getNumber()] += 2;
- AdjustBBOffsetsAfter(MBB, 2);
+ BBInfo[MBB->getNumber()].Size += 2;
+ AdjustBBOffsetsAfter(MBB);
HasFarJump = true;
++NumUBrFixed;
- DEBUG(errs() << " Changed B to long jump " << *MI);
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
return true;
}
@@ -1452,7 +1548,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
/// far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool
-ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
@@ -1487,7 +1583,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// b L1
MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
- DEBUG(errs() << " Invert Bcc condition and swap its destination with "
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
<< *BMI);
BMI->getOperand(0).setMBB(DestBB);
MI->getOperand(0).setMBB(NewDest);
@@ -1502,15 +1598,13 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
- BBSizes[MBB->getNumber()] -= delta;
- MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
- AdjustBBOffsetsAfter(SplitBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
- // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
- DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
<< NextBB->getNumber() << "\n");
@@ -1519,23 +1613,20 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
.addImm(ARMCC::AL).addReg(0);
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
MI->eraseFromParent();
-
- // The net size change is an addition of one unconditional branch.
- int delta = TII->GetInstSizeInBytes(&MBB->back());
- AdjustBBOffsetsAfter(MBB, delta);
+ AdjustBBOffsetsAfter(MBB);
return true;
}
@@ -1561,7 +1652,7 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2Instructions() {
bool MadeChange = false;
// Shrink ADR and LDR from constantpool.
@@ -1598,19 +1689,19 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
U.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = U.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumT2CPShrunk;
MadeChange = true;
}
}
- MadeChange |= OptimizeThumb2Branches(MF);
- MadeChange |= OptimizeThumb2JumpTables(MF);
+ MadeChange |= OptimizeThumb2Branches();
+ MadeChange |= OptimizeThumb2JumpTables();
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2Branches() {
bool MadeChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
@@ -1639,8 +1730,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
Br.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = Br.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
MadeChange = true;
}
@@ -1663,7 +1754,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
MachineBasicBlock::iterator CmpMI = Br.MI;
if (CmpMI != Br.MI->getParent()->begin()) {
@@ -1681,8 +1772,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
CmpMI->eraseFromParent();
Br.MI->eraseFromParent();
Br.MI = NewBR;
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumCBZ;
MadeChange = true;
}
@@ -1696,12 +1787,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
-bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2JumpTables() {
bool MadeChange = false;
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1709,7 +1800,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
@@ -1720,7 +1811,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
MachineBasicBlock *MBB = JTBBs[j];
- unsigned DstOffset = BBOffsets[MBB->getNumber()];
+ unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
// Negative offset is not ok. FIXME: We should change BB layout to make
// sure all the branches are forward.
if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
@@ -1808,8 +1899,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MI->eraseFromParent();
int delta = OrigSize - NewSize;
- BBSizes[MBB->getNumber()] -= delta;
- AdjustBBOffsetsAfter(MBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
+ AdjustBBOffsetsAfter(MBB);
++NumTBs;
MadeChange = true;
@@ -1821,10 +1912,10 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
/// jump tables always branch forwards, since that's what tbb and tbh need.
-bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::ReorderThumb2JumpTables() {
bool MadeChange = false;
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1832,7 +1923,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
@@ -1864,8 +1955,6 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineBasicBlock *ARMConstantIslands::
AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
{
- MachineFunction &MF = *BB->getParent();
-
// If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
@@ -1882,22 +1971,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
// If the block ends in an unconditional branch, move it. The prior block
// has to have an analyzable terminator for us to move this one. Be paranoid
// and make sure we're not trying to move the entry block of the function.
- if (!B && Cond.empty() && BB != MF.begin() &&
+ if (!B && Cond.empty() && BB != MF->begin() &&
!TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
BB->moveAfter(JTBB);
OldPrior->updateTerminator();
BB->updateTerminator();
// Update numbering to account for the block being moved.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
++NumJTMoved;
return NULL;
}
// Create a new MBB for the code after the jump BB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
MachineFunction::iterator MBBI = JTBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
@@ -1907,7 +1996,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
.addImm(ARMCC::AL).addReg(0);
// Update internal data structures to account for the newly inserted MBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
// Update the CFG.
NewBB->addSuccessor(BB);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fc464ea..01d772d 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -61,7 +61,7 @@ namespace {
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs);
+ unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
};
@@ -129,12 +129,15 @@ namespace {
}
static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,true},
-{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, true, SingleSpc, 2, 4,true},
-{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,true},
-{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, true, SingleSpc, 2, 2,true},
-{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,true},
-{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, true, SingleSpc, 2, 8,true},
+{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false},
+{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
+{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false},
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
@@ -177,18 +180,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false},
-{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, true, SingleSpc, 2, 4 ,false},
+{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false},
+{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false},
-{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false},
+{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false},
-{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, true, SingleSpc, 2, 8 ,false},
+{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false},
+{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
-{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
-{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
-{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, true, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
@@ -267,10 +276,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
-{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,true},
-{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, true, SingleSpc, 4, 1 ,true},
-{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true},
-{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true},
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
@@ -296,19 +307,25 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
-{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,true},
-{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
-{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,true},
-{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
-{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,true},
-{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
-
-{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,true},
-{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
-{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,true},
-{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
-{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,true},
-{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false},
+{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false},
+{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false},
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
@@ -620,7 +637,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
/// register operands to real instructions with D register operands.
void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs) {
+ unsigned Opc, bool IsExt) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
@@ -636,11 +653,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0).addReg(D1);
- if (NumRegs > 2)
- MIB.addReg(D2);
- if (NumRegs > 3)
- MIB.addReg(D3);
+ MIB.addReg(D0);
// Copy the other source register operand.
MIB.addOperand(MI.getOperand(OpIdx++));
@@ -1090,12 +1103,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2d8PseudoWB_fixed:
+ case ARM::VLD2d16PseudoWB_fixed:
+ case ARM::VLD2d32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8PseudoWB_register:
+ case ARM::VLD2d16PseudoWB_register:
+ case ARM::VLD2d32PseudoWB_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -1131,9 +1150,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD1DUPq8Pseudo:
case ARM::VLD1DUPq16Pseudo:
case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD1DUPq8PseudoWB_fixed:
+ case ARM::VLD1DUPq16PseudoWB_fixed:
+ case ARM::VLD1DUPq32PseudoWB_fixed:
+ case ARM::VLD1DUPq8PseudoWB_register:
+ case ARM::VLD1DUPq16PseudoWB_register:
+ case ARM::VLD1DUPq32PseudoWB_register:
case ARM::VLD2DUPd8Pseudo:
case ARM::VLD2DUPd16Pseudo:
case ARM::VLD2DUPd32Pseudo:
@@ -1173,12 +1195,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
- case ARM::VST2d8Pseudo_UPD:
- case ARM::VST2d16Pseudo_UPD:
- case ARM::VST2d32Pseudo_UPD:
- case ARM::VST2q8Pseudo_UPD:
- case ARM::VST2q16Pseudo_UPD:
- case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST2d8PseudoWB_fixed:
+ case ARM::VST2d16PseudoWB_fixed:
+ case ARM::VST2d32PseudoWB_fixed:
+ case ARM::VST2q8PseudoWB_fixed:
+ case ARM::VST2q16PseudoWB_fixed:
+ case ARM::VST2q32PseudoWB_fixed:
+ case ARM::VST2d8PseudoWB_register:
+ case ARM::VST2d16PseudoWB_register:
+ case ARM::VST2d32PseudoWB_register:
+ case ARM::VST2q8PseudoWB_register:
+ case ARM::VST2q16PseudoWB_register:
+ case ARM::VST2q32PseudoWB_register:
case ARM::VST3d8Pseudo:
case ARM::VST3d16Pseudo:
case ARM::VST3d32Pseudo:
@@ -1186,7 +1214,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST3d8Pseudo_UPD:
case ARM::VST3d16Pseudo_UPD:
case ARM::VST3d32Pseudo_UPD:
- case ARM::VST1d64TPseudo_UPD:
+ case ARM::VST1d64TPseudoWB_fixed:
+ case ARM::VST1d64TPseudoWB_register:
case ARM::VST3q8Pseudo_UPD:
case ARM::VST3q16Pseudo_UPD:
case ARM::VST3q32Pseudo_UPD:
@@ -1203,7 +1232,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST4d8Pseudo_UPD:
case ARM::VST4d16Pseudo_UPD:
case ARM::VST4d32Pseudo_UPD:
- case ARM::VST1d64QPseudo_UPD:
+ case ARM::VST1d64QPseudoWB_fixed:
+ case ARM::VST1d64QPseudoWB_register:
case ARM::VST4q8Pseudo_UPD:
case ARM::VST4q16Pseudo_UPD:
case ARM::VST4q32Pseudo_UPD:
@@ -1291,12 +1321,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
- case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
- case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
- case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
- case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
- case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
- case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+ case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
}
return false;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9bae422..a98dfc3 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -178,10 +178,12 @@ class ARMFastISel : public FastISel {
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt,
- bool allocReg);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0, bool isZExt = true,
+ bool allocReg = true);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
bool ARMIsMemCpySmall(uint64_t Len);
@@ -227,8 +229,7 @@ class ARMFastISel : public FastISel {
// we don't care about implicit defs here, just places we'll need to add a
// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.hasOptionalDef())
+ if (!MI->hasOptionalDef())
return false;
// Look to see if our OptionalDef is defining CPSR or CCR.
@@ -702,7 +703,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
TargetRegisterClass* RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(SI->second)
.addImm(0));
@@ -898,7 +899,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
ARM::GPRRegisterClass;
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(Addr.Base.FI)
.addImm(0));
@@ -937,7 +938,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
- // ARM halfword load/stores and signed byte loads need an additional operand.
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
if (useAM3) {
signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
MIB.addReg(0);
@@ -950,7 +952,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
// Now add the rest of the operands.
MIB.addReg(Addr.Base.Reg);
- // ARM halfword load/stores and signed byte loads need an additional operand.
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
if (useAM3) {
signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
MIB.addReg(0);
@@ -963,10 +966,11 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
}
bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
- bool isZExt = true, bool allocReg = true) {
+ unsigned Alignment, bool isZExt, bool allocReg) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
bool useAM3 = false;
+ bool needVMOV = false;
TargetRegisterClass *RC;
switch (VT.getSimpleVT().SimpleTy) {
// This is mostly going to be Neon/vector support.
@@ -1012,10 +1016,25 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
RC = ARM::GPRRegisterClass;
break;
case MVT::f32:
- Opc = ARM::VLDRS;
- RC = TLI.getRegClassFor(VT);
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned loads need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ needVMOV = true;
+ VT = MVT::i32;
+ Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ } else {
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ }
break;
case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned loads need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
Opc = ARM::VLDRD;
RC = TLI.getRegClassFor(VT);
break;
@@ -1030,6 +1049,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+ // If we had an unaligned load of a float we've converted it to an regular
+ // load. Now we must move from the GRP to the FP register.
+ if (needVMOV) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(ResultReg));
+ ResultReg = MoveReg;
+ }
return true;
}
@@ -1048,12 +1077,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
unsigned ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment) {
unsigned StrOpc;
bool useAM3 = false;
switch (VT.getSimpleVT().SimpleTy) {
@@ -1101,10 +1132,26 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
- StrOpc = ARM::VSTRS;
+ // Unaligned stores need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ SrcReg = MoveReg;
+ VT = MVT::i32;
+ StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+ } else {
+ StrOpc = ARM::VSTRS;
+ }
break;
case MVT::f64:
if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned stores need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
StrOpc = ARM::VSTRD;
break;
}
@@ -1141,7 +1188,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
- if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ return false;
return true;
}
@@ -1360,7 +1408,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
unsigned SrcReg1 = getRegForValue(Src1Value);
if (SrcReg1 == 0) return false;
- unsigned SrcReg2;
+ unsigned SrcReg2 = 0;
if (!UseImm) {
SrcReg2 = getRegForValue(Src2Value);
if (SrcReg2 == 0) return false;
@@ -1577,7 +1625,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
(ARM_AM::getSOImmVal(Imm) != -1);
}
- unsigned Op2Reg;
+ unsigned Op2Reg = 0;
if (!UseImm) {
Op2Reg = getRegForValue(I->getOperand(2));
if (Op2Reg == 0) return false;
@@ -1716,7 +1764,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard)
+ TM.Options.FloatABIType == FloatABI::Hard)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
@@ -1765,21 +1813,23 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
switch (VA.getLocInfo()) {
case CCValAssign::Full: break;
case CCValAssign::SExt: {
- EVT DestVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
/*isZExt*/false);
assert (ResultReg != 0 && "Failed to emit a sext");
Arg = ResultReg;
+ ArgVT = DestVT;
break;
}
case CCValAssign::AExt:
// Intentional fall-through. Handle AExt and ZExt.
case CCValAssign::ZExt: {
- EVT DestVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
/*isZExt*/true);
assert (ResultReg != 0 && "Failed to emit a sext");
Arg = ResultReg;
+ ArgVT = DestVT;
break;
}
case CCValAssign::BCvt: {
@@ -2456,7 +2506,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
unsigned ResultReg = MI->getOperand(0).getReg();
- if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false))
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
MI->eraseFromParent();
return true;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2d1de6f..06944b1 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -37,7 +37,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Always eliminate non-leaf frame pointers.
- return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ MFI->hasCalls()) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
@@ -309,8 +310,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getDesc().isReturn() &&
- "Can only insert epilog into returning blocks");
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 787f6a2..a5fd15b 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
if (!MI->isDebugValue()) {
- if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
- return Hazard;
-
// Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
// a VMLA / VMLS will cause 4 cycle stall.
const MCInstrDesc &MCID = MI->getDesc();
@@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *DefMI = LastMI;
const MCInstrDesc &LastMCID = LastMI->getDesc();
// Skip over one non-VFP / NEON instruction.
- if (!LastMCID.isBarrier() &&
+ if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+ !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
@@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
void ARMHazardRecognizer::Reset() {
LastMI = 0;
FpMLxStalls = 0;
- ITBlockSize = 0;
ScoreboardHazardRecognizer::Reset();
}
void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
MachineInstr *MI = SU->getInstr();
- unsigned Opcode = MI->getOpcode();
- if (ITBlockSize) {
- --ITBlockSize;
- } else if (Opcode == ARM::t2IT) {
- unsigned Mask = MI->getOperand(1).getImm();
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- ITBlockSize = 4 - NumTZ;
- MachineBasicBlock::iterator I = MI;
- for (unsigned i = 0; i < ITBlockSize; ++i) {
- // Advance to the next instruction, skipping any dbg_value instructions.
- do {
- ++I;
- } while (I->isDebugValue());
- ITBlockMIs[ITBlockSize-1-i] = &*I;
- }
- }
-
if (!MI->isDebugValue()) {
LastMI = MI;
FpMLxStalls = 0;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index 2bc218d..98bfc4c 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -23,6 +23,10 @@ class ARMBaseRegisterInfo;
class ARMSubtarget;
class MachineInstr;
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
@@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
MachineInstr *LastMI;
unsigned FpMLxStalls;
- unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
- MachineInstr *ITBlockMIs[4];
public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
@@ -40,7 +42,7 @@ public:
const ARMSubtarget &sti,
const ScheduleDAG *DAG) :
ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
- TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+ TRI(tri), STI(sti), LastMI(0) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void Reset();
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index bc8588f..7473141 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1579,6 +1579,22 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register;
case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register;
case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register;
+ case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+ case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+ case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register;
+ case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register;
+ case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register;
+ case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+ case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+ case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+ case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register;
+ case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register;
+ case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register;
+ case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+ case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+ case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
}
return Opc; // If not one we handle, return it unchanged.
}
@@ -1646,13 +1662,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0
+ // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+ if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+ // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
+ if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
@@ -1796,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0
+ // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
@@ -2810,10 +2826,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
- ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed};
- unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
- ARM::VLD2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed,
+ ARM::VLD2d16PseudoWB_fixed,
+ ARM::VLD2d32PseudoWB_fixed,
+ ARM::VLD1q64PseudoWB_fixed};
+ unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}
@@ -2876,16 +2895,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST2_UPD: {
- unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
- ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed};
- unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
- ARM::VST2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed,
+ ARM::VST2d16PseudoWB_fixed,
+ ARM::VST2d32PseudoWB_fixed,
+ ARM::VST1q64PseudoWB_fixed};
+ unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST3_UPD: {
unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
- ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
@@ -2897,7 +2919,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VST4_UPD: {
unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8c4c06f..c6c1f5b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -72,7 +72,7 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
-namespace llvm {
+namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -432,7 +432,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
if (!Subtarget->isFPOnlySP())
addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
@@ -467,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
// neither Neon nor VFP support any arithmetic operations on it.
+ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+ // supported for v4f32.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ // FIXME: Code duplication: FDIV and FREM are expanded always, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ // FIXME: Create unittest.
+ // In another words, find a way when "copysign" appears in DAG with vector
+ // operands.
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ // FIXME: Code duplication: SETCC has custom operation action, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FNEG and for FABS.
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -486,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -586,6 +609,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ // These just redirect to CTTZ and CTLZ on ARM.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -674,7 +701,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
@@ -712,7 +740,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
}
@@ -723,7 +752,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Various VFP goodness
- if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
// int <-> fp are custom expanded into bit_convert + ARMISD ops.
if (Subtarget->hasVFP2()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -751,7 +780,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setStackPointerRegisterToSaveRestore(ARM::SP);
- if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+ !Subtarget->hasVFP2())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
@@ -1092,7 +1122,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
else if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard && !isVarArg)
+ getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+ !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
@@ -2951,7 +2982,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
- if (UnsafeFPMath &&
+ if (getTargetMachine().Options.UnsafeFPMath &&
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
CC == ISD::SETNE || CC == ISD::SETUNE)) {
SDValue Result = OptimizeVFPBrcond(Op, DAG);
@@ -3978,9 +4009,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
- if (VT == MVT::v2f32 || VT == MVT::v4f32) {
- ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0));
- int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF());
+ if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+ int ImmVal = ARM_AM::getFP32Imm(SplatBits);
if (ImmVal != -1) {
SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
@@ -6010,7 +6040,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
// executed.
for (MachineBasicBlock::reverse_iterator
II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
- if (!II->getDesc().isCall()) continue;
+ if (!II->isCall()) continue;
DenseMap<unsigned, bool> DefRegs;
for (MachineInstr::mop_iterator
@@ -6421,13 +6451,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const MCInstrDesc *MCID = &MI->getDesc();
- if (!MCID->hasPostISelHook()) {
+ if (!MI->hasPostISelHook()) {
assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
"Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
return;
}
+ const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
// operand is still set to noreg. If needed, set the optional operand's
@@ -6454,7 +6484,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
- if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
+ if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
@@ -7948,7 +7978,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return -0, so vmin can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
@@ -7970,7 +8000,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return +0, so vmax can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 6940156..80f3773 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -201,21 +201,29 @@ def msr_mask : Operand<i32> {
// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
def shr_imm8 : Operand<i32> {
let EncoderMethod = "getShiftRight8Imm";
let DecoderMethod = "DecodeShiftRight8Imm";
+ let ParserMatchClass = shr_imm8_asm_operand;
}
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
def shr_imm16 : Operand<i32> {
let EncoderMethod = "getShiftRight16Imm";
let DecoderMethod = "DecodeShiftRight16Imm";
+ let ParserMatchClass = shr_imm16_asm_operand;
}
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
def shr_imm32 : Operand<i32> {
let EncoderMethod = "getShiftRight32Imm";
let DecoderMethod = "DecodeShiftRight32Imm";
+ let ParserMatchClass = shr_imm32_asm_operand;
}
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
def shr_imm64 : Operand<i32> {
let EncoderMethod = "getShiftRight64Imm";
let DecoderMethod = "DecodeShiftRight64Imm";
+ let ParserMatchClass = shr_imm64_asm_operand;
}
//===----------------------------------------------------------------------===//
@@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasNEON]>;
//===----------------------------------------------------------------------===//
// ARM Instruction templates.
@@ -1994,73 +2010,111 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
// VFP/NEON Instruction aliases for type suffices.
class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
- InstAlias<!strconcat(opc, dt, asm), Result>;
-multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> {
- def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>;
- def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>;
- def U8 : VFPDataTypeInstAlias<opc, ".u8", asm, Result>;
- def F8 : VFPDataTypeInstAlias<opc, ".p8", asm, Result>;
-}
-// VFPDT8ReqInstAlias plus plain ".8"
-multiclass VFPDT8InstAlias<string opc, string asm, dag Result> {
- def _8 : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
- defm : VFPDT8ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT16ReqInstAlias<string opc, string asm, dag Result> {
- def I16 : VFPDataTypeInstAlias<opc, ".i16", asm, Result>;
- def S16 : VFPDataTypeInstAlias<opc, ".s16", asm, Result>;
- def U16 : VFPDataTypeInstAlias<opc, ".u16", asm, Result>;
- def F16 : VFPDataTypeInstAlias<opc, ".p16", asm, Result>;
-}
-// VFPDT16ReqInstAlias plus plain ".16"
-multiclass VFPDT16InstAlias<string opc, string asm, dag Result> {
- def _16 : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
- defm : VFPDT16ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT32ReqInstAlias<string opc, string asm, dag Result> {
- def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>;
- def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>;
- def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>;
- def F32 : VFPDataTypeInstAlias<opc, ".f32", asm, Result>;
- def F : VFPDataTypeInstAlias<opc, ".f", asm, Result>;
-}
-// VFPDT32ReqInstAlias plus plain ".32"
-multiclass VFPDT32InstAlias<string opc, string asm, dag Result> {
- def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
- defm : VFPDT32ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT64ReqInstAlias<string opc, string asm, dag Result> {
- def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
- def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
- def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
- def F64 : VFPDataTypeInstAlias<opc, ".f64", asm, Result>;
- def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>;
-}
-// VFPDT64ReqInstAlias plus plain ".64"
-multiclass VFPDT64InstAlias<string opc, string asm, dag Result> {
- def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
- defm : VFPDT64ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> {
- def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
- def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
- def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
- def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>;
-}
-// VFPDT64ReqInstAlias plus plain ".64"
-multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> {
- def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
- defm : VFPDT64ReqInstAlias<opc, asm, Result>;
-}
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
- defm : VFPDT8InstAlias<opc, asm, Result>;
- defm : VFPDT16InstAlias<opc, asm, Result>;
- defm : VFPDT32InstAlias<opc, asm, Result>;
- defm : VFPDT64InstAlias<opc, asm, Result>;
-}
-multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> {
- defm : VFPDT8InstAlias<opc, asm, Result>;
- defm : VFPDT16InstAlias<opc, asm, Result>;
- defm : VFPDT32InstAlias<opc, asm, Result>;
- defm : VFPDT64NoF64InstAlias<opc, asm, Result>;
-}
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+ AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>;
+ def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>;
+ def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>;
+ def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>;
+}
+// NEONDT8ReqAsmPseudoInst plus plain ".8"
+multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> {
+ def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>;
+ defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>;
+ def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>;
+ def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>;
+ def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>;
+}
+// NEONDT16ReqAsmPseudoInst plus plain ".16"
+multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> {
+ def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>;
+ defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>;
+ def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>;
+ def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>;
+ def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>;
+ def F : NEONDataTypeAsmPseudoInst<opc, ".f", asm, iops>;
+}
+// NEONDT32ReqAsmPseudoInst plus plain ".32"
+multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> {
+ def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>;
+ defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>;
+ def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>;
+ def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>;
+ def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>;
+ def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>;
+}
+// NEONDT64ReqAsmPseudoInst plus plain ".64"
+multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> {
+ def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>;
+ defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>;
+ def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>;
+ def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>;
+ def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>;
+}
+// NEONDT64ReqAsmPseudoInst plus plain ".64"
+multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> {
+ def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>;
+ defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> {
+ defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> {
+ defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>;
+}
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index be03924..516a080 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -238,27 +238,23 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
-/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : ImmLeaf<i32, [{
- return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
-}]>;
-
/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
}]>;
-def so_imm_neg :
- PatLeaf<(imm), [{
+def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
+def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_neg_XFORM>;
+ }], so_imm_neg_XFORM> {
+ let ParserMatchClass = so_imm_neg_asmoperand;
+}
// Note: this pattern doesn't require an encoder method and such, as it's
// only used on aliases (Pat<> and InstAlias<>). The actual encoding
-// is handled by the destination instructions, which use t2_so_imm.
+// is handled by the destination instructions, which use so_imm.
def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
-def so_imm_not :
- Operand<i32>, PatLeaf<(imm), [{
+def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
}], so_imm_not_XFORM> {
let ParserMatchClass = so_imm_not_asmoperand;
@@ -512,6 +508,14 @@ def arm_i32imm : PatLeaf<(imm), [{
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -520,6 +524,42 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_7AsmOperand;
}
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+ let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+ let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+ let ParserMatchClass = Imm32AsmOperand;
+}
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+ let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+ let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+ let ParserMatchClass = Imm1_31AsmOperand;
+}
+
/// imm0_15 predicate - Immediate in the range [0,15].
def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
@@ -544,6 +584,14 @@ def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_32AsmOperand;
}
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 64;
+}]> {
+ let ParserMatchClass = Imm0_63AsmOperand;
+}
+
/// imm0_255 predicate - Immediate in the range [0,255].
def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
@@ -812,6 +860,9 @@ def addrmode6dup : Operand<i32>,
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
let EncoderMethod = "getAddrMode6DupAddressOpValue";
+ // FIXME: This is close, but not quite right. The alignment specifier is
+ // different.
+ let ParserMatchClass = AddrMode6AsmOperand;
}
// addrmodepc := pc + reg
@@ -2753,23 +2804,25 @@ defm STRHT : AI3strT<0b1011, "strht">;
// Load / store multiple Instructions.
//
-multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
InstrItinClass itin, InstrItinClass itin_upd> {
// IA is the default, so no need for an explicit suffix on the
// mnemonic here. Without it is the cannonical spelling.
def IA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2778,16 +2831,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2796,16 +2851,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2814,16 +2871,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def IB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2834,10 +2893,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
} // neverHasSideEffects
@@ -2851,6 +2912,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
(LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
RegConstraint<"$Rn = $wb">;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
+
+
+
//===----------------------------------------------------------------------===//
// Move Instructions.
//
@@ -4999,6 +5070,32 @@ def : MnemonicAlias<"usubaddx", "usax">;
// for isel.
def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
(MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+ (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+ (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+ (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, so_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+ (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via so_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+ (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+ (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
// LSR, ROR, and RRX instructions.
@@ -5056,4 +5153,8 @@ def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
// 'mul' instruction can be specified with only two operands.
def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
- (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+ (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+ (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index f2ca963..c40860d 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -74,9 +74,11 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
let MIOperandInfo = (ops i32imm);
}
+// Register list of one D register.
def VecListOneDAsmOperand : AsmOperandClass {
let Name = "VecListOneD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
let ParserMatchClass = VecListOneDAsmOperand;
@@ -85,6 +87,7 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
def VecListTwoDAsmOperand : AsmOperandClass {
let Name = "VecListTwoD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
let ParserMatchClass = VecListTwoDAsmOperand;
@@ -93,6 +96,7 @@ def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
def VecListThreeDAsmOperand : AsmOperandClass {
let Name = "VecListThreeD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
let ParserMatchClass = VecListThreeDAsmOperand;
@@ -101,6 +105,7 @@ def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
def VecListFourDAsmOperand : AsmOperandClass {
let Name = "VecListFourD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
let ParserMatchClass = VecListFourDAsmOperand;
@@ -109,11 +114,92 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
def VecListTwoQAsmOperand : AsmOperandClass {
let Name = "VecListTwoQ";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
-def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> {
+def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> {
let ParserMatchClass = VecListTwoQAsmOperand;
}
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+ let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
+}
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two D registers, with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
@@ -272,12 +358,23 @@ class VLDQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
+
class VLDQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+
class VLDQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
"$src = $dst">;
@@ -462,31 +559,23 @@ defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
: NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
- (ins addrmode6:$Rn), IIC_VLD2,
- "vld2", Dt, "$Vd, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs VdTy:$Vd),
- (ins addrmode6:$Rn), IIC_VLD2x2,
+ (ins addrmode6:$Rn), itin,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
-def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>;
-def VLD2q8 : VLD2Q<{0,0,?,?}, "8", VecListFourD>;
-def VLD2q16 : VLD2Q<{0,1,?,?}, "16", VecListFourD>;
-def VLD2q32 : VLD2Q<{1,0,?,?}, "32", VecListFourD>;
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
@@ -497,47 +586,56 @@ def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
-class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
- "vld2", Dt, "$Vd, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
- "vld2", Dt, "$Vd, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy, InstrItinClass itin> {
+ def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), itin,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>;
-def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8", VecListFourD>;
-def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>;
-def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>;
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
-def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
+def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
+def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
-def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers
-def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
-def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -997,9 +1095,11 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
- IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
- [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+ (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListOneDAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1025,9 +1125,9 @@ def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
class VLD1QDUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1038,32 +1138,63 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
// ...with address register writeback:
-class VLD1DUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-class VLD1QDUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
-def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
-def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
-def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
-def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
-def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
-def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
class VLD2DUP<bits<4> op7_4, string Dt>
@@ -1329,94 +1460,109 @@ def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>;
// ...with 3 registers
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ (ins addrmode6:$Rn, VecListThreeD:$Vd),
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D3WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8T : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
-def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
-def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
-def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
-def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
-def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ (ins addrmode6:$Rn, VecListFourD:$Vd),
+ IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D4WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
- "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
-def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
-def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
-def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
-def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
-def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
- IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
-}
-class VST2Q<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
- "", []> {
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+ itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
-def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
-def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>;
-def VST2q8 : VST2Q<{0,0,?,?}, "8">;
-def VST2q16 : VST2Q<{0,1,?,?}, "16">;
-def VST2q32 : VST2Q<{1,0,?,?}, "32">;
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
@@ -1427,47 +1573,76 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
-class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
- IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy> {
+ def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-class VST2QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
- "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
-def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
-def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
-def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
-def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
-def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
-def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
-def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
-def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
-def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1741,10 +1916,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
- "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
- "$addr.addr = $wb", []> {
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
@@ -2573,9 +2748,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
(i32 imm:$SIMM))))]>;
@@ -2805,14 +2980,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v4i32, v4i32, OpNode, Commutable>;
}
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
- def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v4i16, ShOp>;
- def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
- v2i32, ShOp>;
- def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v8i16, v4i16, ShOp>;
- def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
v4i32, v2i32, ShOp>;
}
@@ -3477,15 +3649,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
@@ -3574,7 +3746,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
-defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
@@ -4285,18 +4457,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
- ValueType OpTy, SDNode OpNode>
+ ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
- ResTy, OpTy, OpNode> {
+ ResTy, OpTy, ImmTy, OpNode> {
let Inst{21-16} = op21_16;
let DecoderMethod = "DecodeVSHLMaxInstruction";
}
def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
- v8i16, v8i8, NEONvshlli>;
+ v8i16, v8i8, imm8, NEONvshlli>;
def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
- v4i32, v4i16, NEONvshlli>;
+ v4i32, v4i16, imm16, NEONvshlli>;
def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
- v2i64, v2i32, NEONvshlli>;
+ v2i64, v2i32, imm32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
@@ -4469,10 +4641,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
-defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
- (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
-defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
- (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
@@ -4932,34 +5100,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
// VEXT : Vector Extract
-class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
- (Ty DPR:$Vm), imm:$index)))]> {
+ (Ty DPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
- (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
- (Ty QPR:$Vm), imm:$index)))]> {
+ (Ty QPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
let Inst{11-8} = index{3-0};
}
-def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
@@ -4968,17 +5136,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
(i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
-def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
let Inst{11-8} = index{3-0};
}
-def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
(v4f32 QPR:$Vm),
(i32 imm:$index))),
@@ -5026,17 +5198,17 @@ def VTBL1
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+ (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+ (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+ (ins VecListFourD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
} // hasExtraSrcRegAllocReq = 1
def VTBL2Pseudo
@@ -5056,18 +5228,18 @@ def VTBX1
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+ (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+ (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
def VTBX4
- : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
} // hasExtraSrcRegAllocReq = 1
@@ -5207,11 +5379,83 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
// Assembler aliases
//
-// VAND/VEOR/VORR accept but do not require a type suffix.
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+
+// VADD two-operand aliases.
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSUB two-operand aliases.
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VADDW two-operand aliases.
+def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
+ (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
+ (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
+ (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
+ (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
+ (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
+ (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
(VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
@@ -5220,245 +5464,450 @@ defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
-
-// VLD1 requires a size suffix, but also accepts type specific variants.
-// Load one D register.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-
-// Load two D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-
-// Load three D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-
-
-// Load four D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-
-// VST1 requires a size suffix, but also accepts type specific variants.
-// Store one D register.
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-
-// Store two D registers.
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q8wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q16wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q32wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q64wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-
-// FIXME: The three and four register VST1 instructions haven't been moved
-// to the VecList* encoding yet, so we can't do assembly parsing support
-// for them. Uncomment these when that happens.
-// Load three D registers.
-//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-
-// Load four D registers.
-//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-
-
-// VTRN instructions data type suffix aliases for more-specific types.
-defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm",
- (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
- (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
- (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
-
-defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm",
- (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
- (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
- (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>;
+// ... two-operand aliases
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VMUL two-operand aliases.
+def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
+ (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
+ (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
+ (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
+ (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
+ (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
+ (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
+ (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
+ (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
+ (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
+ (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
+ (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
+ (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
+ (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
+ (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
+ (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
+ (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+// VQADD (register) two-operand aliases.
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+// VSHL (register) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VMOV takes an optional datatype suffix
+defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+ (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+ (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+ (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+ (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+ (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+ (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+ (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+ (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+ (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+ (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+ (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+ (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+ (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+ (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+ (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+ (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+ (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+ (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+ (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+ (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+ (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+ (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+ (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+ (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+ (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+ (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+ (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+ (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// Two-operand variants for VEXT
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
+ (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+// Two-operand variants for VQDMULH
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index c6cc98d..ac1a229 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1131,9 +1131,6 @@ def tRSB : // A8.6.141
"rsb", "\t$Rd, $Rn, #0",
[(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
-def : tInstAlias<"neg${s}${p} $Rd, $Rm",
- (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
-
// Subtract with carry register
let Uses = [CPSR] in
def tSBC : // A8.6.151
@@ -1435,3 +1432,8 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
// nothing).
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 6129fa3..981592c 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -80,18 +80,19 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
// only used on aliases (Pat<> and InstAlias<>). The actual encoding
// is handled by the destination instructions, which use t2_so_imm.
def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
-def t2_so_imm_not : Operand<i32>,
- PatLeaf<(imm), [{
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
}], t2_so_imm_not_XFORM> {
let ParserMatchClass = t2_so_imm_not_asmoperand;
}
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
-def t2_so_imm_neg : Operand<i32>,
- PatLeaf<(imm), [{
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_neg_XFORM>;
+}], t2_so_imm_neg_XFORM> {
+ let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
def imm0_4095 : Operand<i32>,
@@ -1333,7 +1334,7 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
let mayStore = 1, neverHasSideEffects = 1 in {
def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
@@ -1357,13 +1358,13 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
} // mayStore = 1, neverHasSideEffects = 1
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, addr_offset_none:$Rn,
+ (ins GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
"str", "\t$Rt, $Rn$offset",
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
- (post_store rGPR:$Rt, addr_offset_none:$Rn,
+ (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset))]>;
def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -3971,6 +3972,18 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+ (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+ (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+ (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
// STMDB/STMDB_UPD aliases w/ the optional .w suffix
def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
(t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
@@ -4084,8 +4097,50 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
// for isel.
def : t2InstAlias<"mov${p} $Rd, $imm",
(t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+ (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+ (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
// Wide 'mul' encoding can be specified with only two operands.
def : t2InstAlias<"mul${p} $Rn, $Rm",
- (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>;
+ (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+ (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e420135..5d43556 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1160,18 +1160,64 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
+// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+ (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+ (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
// VLDR/VSTR accept an optional type suffix.
-defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr",
- (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr",
- (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr",
- (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr",
- (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+ (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+ (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+ (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+ (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
// VMUL has a two-operand form (implied destination operand)
def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c8728f4..6712fb6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -33,6 +33,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -1471,19 +1472,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
while (++I != E) {
if (I->isDebugValue() || MemOps.count(&*I))
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+ if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
return false;
- if (isLd && MCID.mayStore())
+ if (isLd && I->mayStore())
return false;
if (!isLd) {
- if (MCID.mayLoad())
+ if (I->mayLoad())
return false;
// It's not safe to move the first 'str' down.
// str r1, [r0]
// strh r5, [r0]
// str r4, [r0, #+4]
- if (MCID.mayStore())
+ if (I->mayStore())
return false;
}
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1773,8 +1773,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
while (MBBI != E) {
for (; MBBI != E; ++MBBI) {
MachineInstr *MI = MBBI;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isCall() || MCID.isTerminator()) {
+ if (MI->isCall() || MI->isTerminator()) {
// Stop at barriers.
++MBBI;
break;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6cbb24b..61b75cb 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -38,22 +38,25 @@ extern "C" void LLVMInitializeARMTarget() {
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
- if (FloatABIType == FloatABI::Default)
- FloatABIType = FloatABI::Soft;
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Soft;
}
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget),
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32-S32") :
@@ -73,9 +76,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
@@ -143,10 +147,16 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) {
}
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) {
- if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
- PM.add(createThumb2SizeReductionPass());
+ if (Subtarget.isThumb2()) {
+ if (!Subtarget.prefers32BitThumb())
+ PM.add(createThumb2SizeReductionPass());
+
+ // Constant island pass work on unbundled instructions.
+ PM.add(createUnpackMachineBundlesPass());
+ }
PM.add(createARMConstantIslandPass());
+
return true;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a1f517b..cd77822 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,6 +41,7 @@ private:
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
@@ -71,6 +72,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
@@ -112,6 +114,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 19defa1..721a225 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -36,6 +36,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
ELF::SHF_WRITE |
ELF::SHF_ALLOC,
SectionKind::getDataRel());
+ StructorOutputOrder = Structors::PriorityOrder;
LSDASection = NULL;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bb83e5e..cd86065 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -39,10 +39,15 @@ namespace {
class ARMOperand;
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ // Map of register aliases registers via the .req directive.
+ StringMap<unsigned> RegisterReqs;
+
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
unsigned Mask:4; // Condition mask for instructions.
@@ -90,9 +95,12 @@ class ARMAsmParser : public MCTargetAsmParser {
unsigned &ShiftAmount);
bool parseDirectiveWord(unsigned Size, SMLoc L);
bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveARM(SMLoc L);
bool parseDirectiveThumbFunc(SMLoc L);
bool parseDirectiveCode(SMLoc L);
bool parseDirectiveSyntax(SMLoc L);
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
@@ -161,6 +169,7 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
// Asm Match Converter Methods
bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
@@ -271,6 +280,8 @@ class ARMOperand : public MCParsedAsmOperand {
k_DPRRegisterList,
k_SPRRegisterList,
k_VectorList,
+ k_VectorListAllLanes,
+ k_VectorListIndexed,
k_ShiftedRegister,
k_ShiftedImmediate,
k_ShifterImmediate,
@@ -324,6 +335,8 @@ class ARMOperand : public MCParsedAsmOperand {
struct {
unsigned RegNum;
unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
} VectorList;
struct {
@@ -409,6 +422,8 @@ public:
Registers = o.Registers;
break;
case k_VectorList:
+ case k_VectorListAllLanes:
+ case k_VectorListIndexed:
VectorList = o.VectorList;
break;
case k_CoprocNum:
@@ -562,6 +577,22 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 256;
}
+ bool isImm0_1() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 2;
+ }
+ bool isImm0_3() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4;
+ }
bool isImm0_7() const {
if (Kind != k_Immediate)
return false;
@@ -586,6 +617,94 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 32;
}
+ bool isImm0_63() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 64;
+ }
+ bool isImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 8;
+ }
+ bool isImm16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 16;
+ }
+ bool isImm32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 32;
+ }
+ bool isShrImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 8;
+ }
+ bool isShrImm16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 16;
+ }
+ bool isShrImm32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isShrImm64() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 64;
+ }
+ bool isImm1_7() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 8;
+ }
+ bool isImm1_15() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 16;
+ }
+ bool isImm1_31() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 32;
+ }
bool isImm1_16() const {
if (Kind != k_Immediate)
return false;
@@ -676,6 +795,14 @@ public:
int64_t Value = CE->getValue();
return ARM_AM::getSOImmVal(~Value) != -1;
}
+ bool isARMSOImmNeg() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(-Value) != -1;
+ }
bool isT2SOImm() const {
if (Kind != k_Immediate)
return false;
@@ -692,6 +819,14 @@ public:
int64_t Value = CE->getValue();
return ARM_AM::getT2SOImmVal(~Value) != -1;
}
+ bool isT2SOImmNeg() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(-Value) != -1;
+ }
bool isSetEndImm() const {
if (Kind != k_Immediate)
return false;
@@ -892,9 +1027,9 @@ public:
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [-255, -1].
- if (!Memory.OffsetImm) return true;
+ if (!Memory.OffsetImm) return false;
int64_t Val = Memory.OffsetImm->getValue();
- return Val > -256 && Val < 0;
+ return (Val == INT32_MIN) || (Val > -256 && Val < 0);
}
bool isMemUImm12Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -940,31 +1075,75 @@ public:
bool isProcIFlags() const { return Kind == k_ProcIFlags; }
// NEON operands.
+ bool isSingleSpacedVectorList() const {
+ return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorList() const {
+ return Kind == k_VectorList && VectorList.isDoubleSpaced;
+ }
bool isVecListOneD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 1;
}
bool isVecListTwoD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 2;
}
bool isVecListThreeD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 3;
}
bool isVecListFourD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 4;
}
bool isVecListTwoQ() const {
- if (Kind != k_VectorList) return false;
- //FIXME: We haven't taught the parser to handle by-two register lists
- // yet, so don't pretend to know one.
- return VectorList.Count == 2 && false;
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListOneDAllLanes() const {
+ if (Kind != k_VectorListAllLanes) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListTwoDAllLanes() const {
+ if (Kind != k_VectorListAllLanes) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListOneDByteIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListOneDHWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListOneDWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoDByteIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListTwoDHWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoDWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
}
bool isVectorIndex8() const {
@@ -1233,6 +1412,14 @@ public:
Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
}
+ void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a t2_so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The operand is actually a so_imm, but we have its bitwise
@@ -1241,6 +1428,14 @@ public:
Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
}
+ void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
@@ -1527,37 +1722,15 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
}
- void addVecListOneDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListTwoDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListThreeDOperands(MCInst &Inst, unsigned N) const {
+ void addVecListOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
}
- void addVecListFourDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListTwoQOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
+ void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
}
void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
@@ -1780,10 +1953,32 @@ public:
}
static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
- SMLoc S, SMLoc E) {
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
ARMOperand *Op = new ARMOperand(k_VectorList);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
+ unsigned Index, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.LaneIndex = Index;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
@@ -1982,6 +2177,14 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<vector_list " << VectorList.Count << " * "
<< VectorList.RegNum << ">";
break;
+ case k_VectorListAllLanes:
+ OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListIndexed:
+ OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+ << VectorList.Count << " * " << VectorList.RegNum << ">";
+ break;
case k_Token:
OS << "'" << getToken() << "'";
break;
@@ -2000,7 +2203,9 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
RegNo = tryParseRegister();
+ EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -2013,8 +2218,6 @@ int ARMAsmParser::tryParseRegister() {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
- // FIXME: Validate register for the current architecture; we have to do
- // validation later, so maybe there is no need for this here.
std::string lowerCase = Tok.getString().lower();
unsigned RegNum = MatchRegisterName(lowerCase);
if (!RegNum) {
@@ -2023,9 +2226,34 @@ int ARMAsmParser::tryParseRegister() {
.Case("r14", ARM::LR)
.Case("r15", ARM::PC)
.Case("ip", ARM::R12)
+ // Additional register name aliases for 'gas' compatibility.
+ .Case("a1", ARM::R0)
+ .Case("a2", ARM::R1)
+ .Case("a3", ARM::R2)
+ .Case("a4", ARM::R3)
+ .Case("v1", ARM::R4)
+ .Case("v2", ARM::R5)
+ .Case("v3", ARM::R6)
+ .Case("v4", ARM::R7)
+ .Case("v5", ARM::R8)
+ .Case("v6", ARM::R9)
+ .Case("v7", ARM::R10)
+ .Case("v8", ARM::R11)
+ .Case("sb", ARM::R9)
+ .Case("sl", ARM::R10)
+ .Case("fp", ARM::R11)
.Default(0);
}
- if (!RegNum) return -1;
+ if (!RegNum) {
+ // Check for aliases registered via .req.
+ StringMap<unsigned>::const_iterator Entry =
+ RegisterReqs.find(Tok.getIdentifier());
+ // If no match, return failure.
+ if (Entry == RegisterReqs.end())
+ return -1;
+ Parser.Lex(); // Eat identifier token.
+ return Entry->getValue();
+ }
Parser.Lex(); // Eat identifier token.
@@ -2045,6 +2273,7 @@ int ARMAsmParser::tryParseShiftRegister(
std::string lowerCase = Tok.getString().lower();
ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("asl", ARM_AM::lsl)
.Case("lsl", ARM_AM::lsl)
.Case("lsr", ARM_AM::lsr)
.Case("asr", ARM_AM::asr)
@@ -2073,7 +2302,8 @@ int ARMAsmParser::tryParseShiftRegister(
ShiftReg = SrcReg;
} else {
// Figure out if this is shifted by a constant or a register (for non-RRX).
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
@@ -2446,6 +2676,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the comma.
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
+ const AsmToken RegTok = Parser.getTok();
Reg = tryParseRegister();
if (Reg == -1)
return Error(RegLoc, "register expected");
@@ -2459,8 +2690,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
+ if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg))
return Error(RegLoc, "register list not in ascending order");
+ if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ continue;
+ }
// VFP register lists must also be contiguous.
// It's OK to use the enumeration values directly here rather, as the
// VFP register classes have the enum sorted properly.
@@ -2477,13 +2713,55 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return Error(E, "'}' expected");
Parser.Lex(); // Eat '}' token.
+ // Push the register list operand.
Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+ // The ARM system instruction variants for LDM/STM have a '^' token here.
+ if (Parser.getTok().is(AsmToken::Caret)) {
+ Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat '^' token.
+ }
+
return false;
}
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+ Index = 0; // Always return a defined index value.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ Parser.Lex(); // Eat the '['.
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // "Dn[]" is the 'all lanes' syntax.
+ LaneKind = AllLanes;
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+ if (Parser.getTok().is(AsmToken::Integer)) {
+ int64_t Val = Parser.getTok().getIntVal();
+ // Make this range check context sensitive for .8, .16, .32.
+ if (Val < 0 && Val > 7)
+ Error(Parser.getTok().getLoc(), "lane index out of range");
+ Index = Val;
+ LaneKind = IndexedLane;
+ Parser.Lex(); // Eat the token;
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ Error(Parser.getTok().getLoc(), "']' expected");
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+ Error(Parser.getTok().getLoc(), "lane index must be empty or an integer");
+ return MatchOperand_ParseFail;
+ }
+ LaneKind = NoLanes;
+ return MatchOperand_Success;
+}
+
// parse a vector register list
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ VectorLaneTy LaneKind;
+ unsigned LaneIndex;
SMLoc S = Parser.getTok().getLoc();
// As an extension (to match gas), support a plain D register or Q register
// (without encosing curly braces) as a single or double entry list,
@@ -2494,12 +2772,48 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_NoMatch;
SMLoc E = Parser.getTok().getLoc();
if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
- Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E));
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind!");
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+ LaneIndex, S,E));
+ break;
+ }
return MatchOperand_Success;
}
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
- Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E));
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind!");
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+ LaneIndex, S,E));
+ break;
+ }
return MatchOperand_Success;
}
Error(S, "vector register expected");
@@ -2518,18 +2832,30 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
unsigned Count = 1;
+ int Spacing = 0;
unsigned FirstReg = Reg;
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
FirstReg = Reg = getDRegFromQReg(Reg);
+ Spacing = 1; // double-spacing requires explicit D registers, otherwise
+ // it's ambiguous with four-register single spaced.
++Reg;
++Count;
}
+ if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
while (Parser.getTok().is(AsmToken::Comma) ||
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(Parser.getTok().getLoc(),
+ "sequential registers in double spaced list");
+ return MatchOperand_ParseFail;
+ }
Parser.Lex(); // Eat the minus.
SMLoc EndLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
@@ -2554,6 +2880,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Error(EndLoc, "bad range in register list");
return MatchOperand_ParseFail;
}
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ EndLoc = Parser.getTok().getLoc();
// Add all the registers in the range to the register list.
Count += EndReg - Reg;
@@ -2575,6 +2911,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(RegLoc,
+ "invalid register in double-spaced list (must be 'D' register')");
+ return MatchOperand_ParseFail;
+ }
Reg = getDRegFromQReg(Reg);
if (Reg != OldReg + 1) {
Error(RegLoc, "non-contiguous register range");
@@ -2582,14 +2925,45 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
++Reg;
Count += 2;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
continue;
}
- // Normal D register. Just check that it's contiguous and keep going.
- if (Reg != OldReg + 1) {
+ // Normal D register.
+ // Figure out the register spacing (single or double) of the list if
+ // we don't know it already.
+ if (!Spacing)
+ Spacing = 1 + (Reg == OldReg + 2);
+
+ // Just check that it's contiguous and keep going.
+ if (Reg != OldReg + Spacing) {
Error(RegLoc, "non-contiguous register range");
return MatchOperand_ParseFail;
}
++Count;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ if (Spacing == 2 && LaneKind != NoLanes) {
+ Error(EndLoc,
+ "lane index specfier invalid in double spaced register list");
+ return MatchOperand_ParseFail;
+ }
}
SMLoc E = Parser.getTok().getLoc();
@@ -2599,7 +2973,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
Parser.Lex(); // Eat '}' token.
- Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, S, E));
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind in register list.");
+ case NoLanes:
+ Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+ (Spacing == 2), S, E));
+ break;
+ case AllLanes:
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+ LaneIndex, S, E));
+ break;
+ }
return MatchOperand_Success;
}
@@ -2786,7 +3175,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
Parser.Lex(); // Eat shift type token.
// There must be a '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2864,7 +3254,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2924,7 +3315,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a rotate amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2961,7 +3353,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2993,7 +3386,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -3087,7 +3481,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Tok.getLoc();
// Do immediates first, as we always parse those if we have a '#'.
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat the '#'.
// Explicitly look for a '-', as we need to encode negative zero
// differently.
@@ -3444,7 +3839,7 @@ bool ARMAsmParser::
cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
// Vn
@@ -3458,7 +3853,7 @@ bool ARMAsmParser::
cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
// Vn
@@ -3478,7 +3873,7 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
// Vn
((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
// Vt
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
@@ -3494,7 +3889,7 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
// Vm
((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
// Vt
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
@@ -3591,8 +3986,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// offset. Be friendly and also accept a plain integer (without a leading
// hash) for gas compatibility.
if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar) ||
Parser.getTok().is(AsmToken::Integer)) {
- if (Parser.getTok().is(AsmToken::Hash))
+ if (Parser.getTok().isNot(AsmToken::Integer))
Parser.Lex(); // Eat the '#'.
E = Parser.getTok().getLoc();
@@ -3690,7 +4086,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
if (Tok.isNot(AsmToken::Identifier))
return true;
StringRef ShiftName = Tok.getString();
- if (ShiftName == "lsl" || ShiftName == "LSL")
+ if (ShiftName == "lsl" || ShiftName == "LSL" ||
+ ShiftName == "asl" || ShiftName == "ASL")
St = ARM_AM::lsl;
else if (ShiftName == "lsr" || ShiftName == "LSR")
St = ARM_AM::lsr;
@@ -3710,7 +4107,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
Loc = Parser.getTok().getLoc();
// A '#' and a shift amount.
const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
+ if (HashTok.isNot(AsmToken::Hash) &&
+ HashTok.isNot(AsmToken::Dollar))
return Error(HashTok.getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
@@ -3739,7 +4137,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::Hash))
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
// Disambiguate the VMOV forms that can accept an FP immediate.
@@ -3852,6 +4251,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return parseMemory(Operands);
case AsmToken::LCurly:
return parseRegisterList(Operands);
+ case AsmToken::Dollar:
case AsmToken::Hash: {
// #42 -> immediate.
// TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -3990,7 +4390,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
- Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+ Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+ Mnemonic == "fsts" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
@@ -4206,9 +4608,27 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Apply mnemonic aliases before doing anything else, as the destination
+ // mnemnonic may include suffices and we want to handle them normally.
+ // The generic tblgen'erated code does this later, at the start of
+ // MatchInstructionImpl(), but that's too late for aliases that include
+ // any sort of suffix.
+ unsigned AvailableFeatures = getAvailableFeatures();
+ applyMnemonicAliases(Name, AvailableFeatures);
+
+ // First check for the ARM-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Mnemonic = Name.slice(Start, Next);
@@ -4400,12 +4820,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
// Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
- // end. Convert it to a token here.
+ // end. Convert it to a token here. Take care not to convert those
+ // that should hit the Thumb2 encoding.
if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
static_cast<ARMOperand*>(Operands[5])->isImm()) {
ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
+ if (CE && CE->getValue() == 0 &&
+ (isThumbOne() ||
+ // The cc_out operand matches the IT block.
+ ((inITBlock() != CarrySetting) &&
+ // Neither register operand is a high register.
+ (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
Operands.erase(Operands.begin() + 5);
Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
delete Op;
@@ -4605,11 +5034,495 @@ validateInstruction(MCInst &Inst,
return false;
}
+static unsigned getRealVSTLNOpcode(unsigned Opc) {
+ switch(Opc) {
+ default: assert(0 && "unexpected opcode!");
+ // VST1LN
+ case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8:
+ case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8:
+ case ARM::VST1LNdWB_fixed_Asm_U8:
+ return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16:
+ case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16:
+ case ARM::VST1LNdWB_fixed_Asm_U16:
+ return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F:
+ case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32:
+ case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32:
+ return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8:
+ case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8:
+ case ARM::VST1LNdWB_register_Asm_U8:
+ return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16:
+ case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16:
+ case ARM::VST1LNdWB_register_Asm_U16:
+ return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F:
+ case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32:
+ case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32:
+ return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8:
+ case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8:
+ case ARM::VST1LNdAsm_U8:
+ return ARM::VST1LNd8;
+ case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16:
+ case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16:
+ case ARM::VST1LNdAsm_U16:
+ return ARM::VST1LNd16;
+ case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F:
+ case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32:
+ case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32:
+ return ARM::VST1LNd32;
+
+ // VST2LN
+ case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8:
+ case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8:
+ case ARM::VST2LNdWB_fixed_Asm_U8:
+ return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16:
+ case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16:
+ case ARM::VST2LNdWB_fixed_Asm_U16:
+ return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F:
+ case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32:
+ case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32:
+ return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8:
+ case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8:
+ case ARM::VST2LNdWB_register_Asm_U8:
+ return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16:
+ case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16:
+ case ARM::VST2LNdWB_register_Asm_U16:
+ return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F:
+ case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32:
+ case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32:
+ return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8:
+ case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8:
+ case ARM::VST2LNdAsm_U8:
+ return ARM::VST2LNd8;
+ case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16:
+ case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16:
+ case ARM::VST2LNdAsm_U16:
+ return ARM::VST2LNd16;
+ case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F:
+ case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32:
+ case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32:
+ return ARM::VST2LNd32;
+ }
+}
+
+static unsigned getRealVLDLNOpcode(unsigned Opc) {
+ switch(Opc) {
+ default: assert(0 && "unexpected opcode!");
+ // VLD1LN
+ case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8:
+ case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8:
+ case ARM::VLD1LNdWB_fixed_Asm_U8:
+ return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16:
+ case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16:
+ case ARM::VLD1LNdWB_fixed_Asm_U16:
+ return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F:
+ case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32:
+ case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32:
+ return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8:
+ case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8:
+ case ARM::VLD1LNdWB_register_Asm_U8:
+ return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16:
+ case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16:
+ case ARM::VLD1LNdWB_register_Asm_U16:
+ return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F:
+ case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32:
+ case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32:
+ return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8:
+ case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8:
+ case ARM::VLD1LNdAsm_U8:
+ return ARM::VLD1LNd8;
+ case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16:
+ case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16:
+ case ARM::VLD1LNdAsm_U16:
+ return ARM::VLD1LNd16;
+ case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F:
+ case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32:
+ case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32:
+ return ARM::VLD1LNd32;
+
+ // VLD2LN
+ case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8:
+ case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8:
+ case ARM::VLD2LNdWB_fixed_Asm_U8:
+ return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16:
+ case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16:
+ case ARM::VLD2LNdWB_fixed_Asm_U16:
+ return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F:
+ case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32:
+ case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32:
+ return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8:
+ case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8:
+ case ARM::VLD2LNdWB_register_Asm_U8:
+ return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16:
+ case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16:
+ case ARM::VLD2LNdWB_register_Asm_U16:
+ return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F:
+ case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32:
+ case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32:
+ return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8:
+ case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8:
+ case ARM::VLD2LNdAsm_U8:
+ return ARM::VLD2LNd8;
+ case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16:
+ case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16:
+ case ARM::VLD2LNdAsm_U16:
+ return ARM::VLD2LNd16;
+ case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F:
+ case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32:
+ case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32:
+ return ARM::VLD2LNd32;
+ }
+}
+
bool ARMAsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
- // Handle the MOV complex aliases.
+ // Handle NEON VST complex aliases.
+ case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8:
+ case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8:
+ case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16:
+ case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16:
+ case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16:
+ case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F:
+ case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32:
+ case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8:
+ case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8:
+ case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16:
+ case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16:
+ case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16:
+ case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F:
+ case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32:
+ case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8:
+ case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8:
+ case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16:
+ case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16:
+ case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16:
+ case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F:
+ case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32:
+ case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8:
+ case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8:
+ case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16:
+ case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16:
+ case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16:
+ case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F:
+ case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32:
+ case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8:
+ case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16:
+ case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16:
+ case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F:
+ case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32:
+ case ARM::VST1LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8:
+ case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16:
+ case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16:
+ case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F:
+ case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32:
+ case ARM::VST2LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle NEON VLD complex aliases.
+ case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8:
+ case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8:
+ case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16:
+ case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16:
+ case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16:
+ case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F:
+ case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32:
+ case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8:
+ case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8:
+ case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16:
+ case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16:
+ case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16:
+ case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F:
+ case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32:
+ case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8:
+ case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8:
+ case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16:
+ case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16:
+ case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16:
+ case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F:
+ case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32:
+ case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8:
+ case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8:
+ case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16:
+ case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16:
+ case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16:
+ case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F:
+ case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32:
+ case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8:
+ case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16:
+ case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16:
+ case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F:
+ case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32:
+ case ARM::VLD1LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8:
+ case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16:
+ case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16:
+ case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F:
+ case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32:
+ case ARM::VLD2LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the Thumb2 mode MOV complex aliases.
+ case ARM::t2MOVsi:
+ case ARM::t2MOVSsi: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ }
+ unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Ammount == 32) Ammount = 0;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the ARM mode MOV complex aliases.
case ARM::ASRr:
case ARM::LSRr:
case ARM::LSLr:
@@ -4743,6 +5656,24 @@ processInstruction(MCInst &Inst,
Inst = TmpInst;
}
break;
+ case ARM::t2ADDri12:
+ // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+ // mnemonic was used (not "addw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2ADDri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
+ case ARM::t2SUBri12:
+ // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+ // mnemonic was used (not "subw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2SUBri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
case ARM::tADDi8:
// If the immediate is in the range 0-7, we want tADDi3 iff Rd was
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
@@ -4763,6 +5694,26 @@ processInstruction(MCInst &Inst,
return true;
}
break;
+ case ARM::t2ADDrr: {
+ // If the destination and first source operand are the same, and
+ // there's no setting of the flags, use encoding T2 instead of T3.
+ // Note that this is only for ADD, not SUB. This mirrors the system
+ // 'as' behaviour. Make sure the wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ Inst.getOperand(5).getReg() != 0 ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDhirr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
case ARM::tB:
// A Thumb conditional branch outside of an IT block is a tBcc.
if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
@@ -5079,12 +6030,16 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveWord(4, DirectiveID.getLoc());
else if (IDVal == ".thumb")
return parseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".arm")
+ return parseDirectiveARM(DirectiveID.getLoc());
else if (IDVal == ".thumb_func")
return parseDirectiveThumbFunc(DirectiveID.getLoc());
else if (IDVal == ".code")
return parseDirectiveCode(DirectiveID.getLoc());
else if (IDVal == ".syntax")
return parseDirectiveSyntax(DirectiveID.getLoc());
+ else if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
return true;
}
@@ -5120,9 +6075,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
- // TODO: set thumb mode
- // TODO: tell the MC streamer the mode
- // getParser().getStreamer().Emit???();
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ return false;
+}
+
+/// parseDirectiveARM
+/// ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
return false;
}
@@ -5212,6 +6180,45 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
return false;
}
+/// parseDirectiveReq
+/// ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ unsigned Reg;
+ SMLoc SRegLoc, ERegLoc;
+ if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+ Parser.EatToEndOfStatement();
+ return Error(SRegLoc, "register name expected");
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return Error(Parser.getTok().getLoc(),
+ "unexpected input in .req directive.");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg)
+ return Error(SRegLoc, "redefinition of '" + Name +
+ "' does not match original.");
+
+ return false;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.EatToEndOfStatement();
+ return Error(L, "unexpected input in .unreq directive.");
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
extern "C" void LLVMInitializeARMAsmLexer();
/// Force static initialization.
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 3f5ad39..e24a1b1 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser
)
add_dependencies(LLVMARMAsmParser ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmParser
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt
index cbf9b4b..f0184b6 100644
--- a/lib/Target/ARM/AsmParser/LLVMBuild.txt
+++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMAsmParser
parent = ARM
required_libraries = ARMDesc ARMInfo MC MCParser Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 511932e..04cdf55 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -48,20 +48,6 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
-add_llvm_library_dependencies(LLVMARMCodeGen
- LLVMARMAsmPrinter
- LLVMARMDesc
- LLVMARMInfo
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
# workaround for hanging compilation on MSVC9, 10
if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ad250ab..49c64fd 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -2085,15 +2085,24 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VLD1d32Qwb_register:
case ARM::VLD1d64Qwb_fixed:
case ARM::VLD1d64Qwb_register:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
- case ARM::VLD2b8_UPD:
- case ARM::VLD2b16_UPD:
- case ARM::VLD2b32_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b8wb_register:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_register:
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:
@@ -2196,23 +2205,40 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST1q16wb_register:
case ARM::VST1q32wb_register:
case ARM::VST1q64wb_register:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Twb_register:
+ case ARM::VST1d16Twb_register:
+ case ARM::VST1d32Twb_register:
+ case ARM::VST1d64Twb_register:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST1d8Qwb_register:
+ case ARM::VST1d16Qwb_register:
+ case ARM::VST1d32Qwb_register:
+ case ARM::VST1d64Qwb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d8wb_register:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2q8wb_register:
+ case ARM::VST2q16wb_register:
+ case ARM::VST2q32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b8wb_register:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_register:
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD:
@@ -2264,34 +2290,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Second input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2307,12 +2305,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
return MCDisassembler::Fail;
break;
- case ARM::VST2b8:
- case ARM::VST2b16:
- case ARM::VST2b32:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
case ARM::VST3q8:
case ARM::VST3q16:
case ARM::VST3q32:
@@ -2334,28 +2326,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Third input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2392,20 +2362,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Fourth input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST4d8:
case ARM::VST4d16:
case ARM::VST4d32:
@@ -2441,16 +2397,11 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = fieldFromInstruction32(Insn, 6, 2);
- unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
align *= (1 << size);
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
- if (regs == 2) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
- return MCDisassembler::Fail;
- }
if (Rm != 0xF) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2460,12 +2411,12 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
- }
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
return S;
}
@@ -2693,7 +2644,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
unsigned op = fieldFromInstruction32(Insn, 6, 1);
- unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2702,10 +2652,8 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail; // Writeback
}
- for (unsigned i = 0; i < length; ++i) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
- }
if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -4138,4 +4086,3 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
index da87751..9de6e5c 100644
--- a/lib/Target/ARM/Disassembler/CMakeLists.txt
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -11,11 +11,3 @@ set_property(
)
endif()
add_dependencies(LLVMARMDisassembler ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMDisassembler
- LLVMARMCodeGen
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
index baa9bc3..94075a9 100644
--- a/lib/Target/ARM/Disassembler/LLVMBuild.txt
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMDisassembler
parent = ARM
required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 6c6c021..662097a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1029,3 +1029,29 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
<< getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
<< getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+}
+
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 3f38f1a..05db2d2 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -133,6 +133,12 @@ public:
void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
index fa0b495..e2d4819 100644
--- a/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter
)
add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmPrinter
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
index b34aab4..6f4fa36 100644
--- a/lib/Target/ARM/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMAsmPrinter
parent = ARM
required_libraries = MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt
index 9082539..fd4b3a3 100644
--- a/lib/Target/ARM/LLVMBuild.txt
+++ b/lib/Target/ARM/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = ARM
@@ -30,4 +33,3 @@ name = ARMCodeGen
parent = ARM
required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 62d04c4..bf1f0e8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -102,6 +102,11 @@ public:
bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -124,14 +129,49 @@ public:
};
} // end anonymous namespace
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case ARM::tBcc: return ARM::t2Bcc;
+ }
+}
+
bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
- // FIXME: Thumb targets, different move constant targets..
+ if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
return false;
}
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 254 || Offset < -256;
+}
+
void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
- assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
- return;
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ // The instructions we're relaxing have (so far) the same operands.
+ // We just need to update to the proper opcode.
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
}
bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 865c3e2..c38a882 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1412,7 +1412,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return MO.getReg();
+ return getARMRegisterNumbering(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 352c73e..f394b4f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
@@ -178,9 +179,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
MovtBit = 1;
+ // The thumb bit shouldn't be set in the 'other-half' bit of the
+ // relocation, but it will be set in FixedValue if the base symbol
+ // is a thumb function. Clear it out here.
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
break;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
MovtBit = 1;
// Fallthrough
case ARM::fixup_t2_movw_lo16:
@@ -189,7 +197,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
break;
}
-
if (Type == macho::RIT_ARM_HalfDifference) {
uint32_t OtherHalf = MovtBit
? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index f529314..f2cf78a 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -10,10 +10,3 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
-
-add_llvm_library_dependencies(LLVMARMDesc
- LLVMARMAsmPrinter
- LLVMARMInfo
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
index 46b11c7..2a7fe61 100644
--- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMDesc
parent = ARM
required_libraries = ARMAsmPrinter ARMInfo MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 2df0053..000a37f 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
.addReg(Src2Reg, getKillRegState(Src2Kill));
if (HasLane)
MIB.addImm(LaneImm);
MIB.addImm(Pred).addReg(PredReg);
- MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+ MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
if (NegAcc) {
@@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
}
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isBarrier()) {
+ if (MI->isBarrier()) {
clearStack();
Skip = 0;
++MII;
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
index 8b38b13..533e747 100644
--- a/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo
)
add_dependencies(LLVMARMInfo ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
index 046c1fc..a07a940 100644
--- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMInfo
parent = ARM
required_libraries = MC Support Target
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index e8ed482..e61c0a7 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -643,14 +643,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(Offset && "This code isn't needed if offset already handled!");
unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MI.getDesc();
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
removeOperands(MI, PIdx);
- if (Desc.mayLoad()) {
+ if (MI.mayLoad()) {
// Use the destination register to materialize sp + offset.
unsigned TmpReg = MI.getOperand(0).getReg();
bool UseRR = false;
@@ -673,7 +672,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
- } else if (Desc.mayStore()) {
+ } else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
bool UseRR = false;
@@ -699,7 +698,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Add predicate back if it's needed.
- if (MI.getDesc().isPredicable()) {
+ if (MI.isPredicable()) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b627400..55b4d30 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -13,6 +13,7 @@
#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
// rsb r2, 0
//
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.hasOptionalDef() &&
+ if (MI->hasOptionalDef() &&
MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
return false;
@@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
for (; MBBI != E && Pos &&
- (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
if (MBBI->isDebugValue())
continue;
@@ -237,6 +238,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Last instruction in IT block kills ITSTATE.
LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+ // Finalize the bundle.
+ FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI);
+
Modified = true;
++NumITs;
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index e5fc8b4..e206288 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -452,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0));
MIB.addOperand(MI->getOperand(1));
@@ -478,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumLdSts;
return true;
}
@@ -513,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
return false;
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(ARM::tADDrSPi))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
@@ -525,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -533,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (Entry.LowRegs1 && !VerifyLowRegs(MI))
return false;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayLoad() || MCID.mayStore())
+ if (MI->mayLoad() || MI->mayStore())
return ReduceLoadStore(MBB, MI, Entry);
switch (Opc) {
@@ -654,7 +653,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -678,7 +677,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++Num2Addrs;
return true;
}
@@ -745,7 +744,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -785,7 +784,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -830,16 +829,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
MachineInstr *CPSRDef = 0;
+ MachineInstr *BundleMI = 0;
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
bool IsSelfLoop = MBB.isSuccessor(&MBB);
- MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMII;
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator NextMII;
for (; MII != E; MII = NextMII) {
NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
+ if (MI->isBundle()) {
+ BundleMI = MI;
+ continue;
+ }
+
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
unsigned Opcode = MI->getOpcode();
@@ -850,7 +855,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.Special) {
if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
goto ProcessNext;
@@ -860,7 +865,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.NarrowOpc2 &&
ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
goto ProcessNext;
}
@@ -869,15 +874,24 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.NarrowOpc1 &&
ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
}
ProcessNext:
+ if (LiveCPSR &&
+ NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() &&
+ BundleMI->killsRegister(ARM::CPSR))
+ // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+ // marker is only on the BUNDLE instruction. Process the BUNDLE
+ // instruction as we finish with the bundled instruction to work around
+ // the inconsistency.
+ LiveCPSR = false;
+
bool DefCPSR = false;
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
- if (MI->getDesc().isCall()) {
+ if (MI->isCall()) {
// Calls don't really set CPSR.
CPSRDef = 0;
IsSelfLoop = false;