aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM/ARMBaseInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp349
1 files changed, 227 insertions, 122 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 29033e5..e2f0d7d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -683,7 +683,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Handle register classes that require multiple instructions.
unsigned BeginIdx = 0;
unsigned SubRegs = 0;
- unsigned Spacing = 1;
+ int Spacing = 1;
// Use VORRq when possible.
if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
@@ -705,27 +705,38 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
- if (Opc) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MachineInstrBuilder Mov;
- for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
- assert(Dst && Src && "Bad sub-register");
- Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
- .addReg(Src));
- // VORR takes two source operands.
- if (Opc == ARM::VORRq)
- Mov.addReg(Src);
- }
- // Add implicit super-register defs and kills to the last instruction.
- Mov->addRegisterDefined(DestReg, TRI);
- if (KillSrc)
- Mov->addRegisterKilled(SrcReg, TRI);
- return;
- }
+ assert(Opc && "Impossible reg-to-reg copy");
- llvm_unreachable("Impossible reg-to-reg copy");
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstrBuilder Mov;
+
+ // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
+ if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
+ BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+ Spacing = -Spacing;
+ }
+#ifndef NDEBUG
+ SmallSet<unsigned, 4> DstRegs;
+#endif
+ for (unsigned i = 0; i != SubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ assert(Dst && Src && "Bad sub-register");
+#ifndef NDEBUG
+ assert(!DstRegs.count(Src) && "destructive vector copy");
+ DstRegs.insert(Dst);
+#endif
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
+ .addReg(Src);
+ // VORR takes two source operands.
+ if (Opc == ARM::VORRq)
+ Mov.addReg(Src);
+ Mov = AddDefaultPred(Mov);
+ }
+ // Add implicit super-register defs and kills to the last instruction.
+ Mov->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ Mov->addRegisterKilled(SrcReg, TRI);
}
static const
@@ -1569,16 +1580,20 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
}
/// Identify instructions that can be folded into a MOVCC instruction, and
-/// return the corresponding opcode for the predicated pseudo-instruction.
-static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
- const MachineRegisterInfo &MRI) {
+/// return the defining instruction.
+static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return 0;
if (!MRI.hasOneNonDBGUse(Reg))
return 0;
- MI = MRI.getVRegDef(Reg);
+ MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return 0;
+ // MI is folded into the MOVCC by predicating it.
+ if (!MI->isPredicable())
+ return 0;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
@@ -1588,55 +1603,18 @@ static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
return 0;
if (!MO.isReg())
continue;
+ // MI can't have any tied operands, that would conflict with predication.
+ if (MO.isTied())
+ return 0;
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
return 0;
if (MO.isDef() && !MO.isDead())
return 0;
}
- switch (MI->getOpcode()) {
- default: return 0;
- case ARM::ANDri: return ARM::ANDCCri;
- case ARM::ANDrr: return ARM::ANDCCrr;
- case ARM::ANDrsi: return ARM::ANDCCrsi;
- case ARM::ANDrsr: return ARM::ANDCCrsr;
- case ARM::t2ANDri: return ARM::t2ANDCCri;
- case ARM::t2ANDrr: return ARM::t2ANDCCrr;
- case ARM::t2ANDrs: return ARM::t2ANDCCrs;
- case ARM::EORri: return ARM::EORCCri;
- case ARM::EORrr: return ARM::EORCCrr;
- case ARM::EORrsi: return ARM::EORCCrsi;
- case ARM::EORrsr: return ARM::EORCCrsr;
- case ARM::t2EORri: return ARM::t2EORCCri;
- case ARM::t2EORrr: return ARM::t2EORCCrr;
- case ARM::t2EORrs: return ARM::t2EORCCrs;
- case ARM::ORRri: return ARM::ORRCCri;
- case ARM::ORRrr: return ARM::ORRCCrr;
- case ARM::ORRrsi: return ARM::ORRCCrsi;
- case ARM::ORRrsr: return ARM::ORRCCrsr;
- case ARM::t2ORRri: return ARM::t2ORRCCri;
- case ARM::t2ORRrr: return ARM::t2ORRCCrr;
- case ARM::t2ORRrs: return ARM::t2ORRCCrs;
-
- // ARM ADD/SUB
- case ARM::ADDri: return ARM::ADDCCri;
- case ARM::ADDrr: return ARM::ADDCCrr;
- case ARM::ADDrsi: return ARM::ADDCCrsi;
- case ARM::ADDrsr: return ARM::ADDCCrsr;
- case ARM::SUBri: return ARM::SUBCCri;
- case ARM::SUBrr: return ARM::SUBCCrr;
- case ARM::SUBrsi: return ARM::SUBCCrsi;
- case ARM::SUBrsr: return ARM::SUBCCrsr;
-
- // Thumb2 ADD/SUB
- case ARM::t2ADDri: return ARM::t2ADDCCri;
- case ARM::t2ADDri12: return ARM::t2ADDCCri12;
- case ARM::t2ADDrr: return ARM::t2ADDCCrr;
- case ARM::t2ADDrs: return ARM::t2ADDCCrs;
- case ARM::t2SUBri: return ARM::t2SUBCCri;
- case ARM::t2SUBri12: return ARM::t2SUBCCri12;
- case ARM::t2SUBrr: return ARM::t2SUBCCrr;
- case ARM::t2SUBrs: return ARM::t2SUBCCrs;
- }
+ bool DontMoveAcrossStores = true;
+ if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
+ return 0;
+ return MI;
}
bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
@@ -1665,19 +1643,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- MachineInstr *DefMI = 0;
- unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
- bool Invert = !Opc;
- if (!Opc)
- Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
- if (!Opc)
+ MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
+ bool Invert = !DefMI;
+ if (!DefMI)
+ DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
+ if (!DefMI)
return 0;
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- get(Opc), MI->getOperand(0).getReg())
- .addOperand(MI->getOperand(Invert ? 2 : 1));
+ DefMI->getDesc(),
+ MI->getOperand(0).getReg());
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1696,6 +1673,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (NewMI->hasOptionalDef())
AddDefaultCC(NewMI);
+ // The output register value when the predicate is false is an implicit
+ // register operand tied to the first def.
+ // The tie makes the register allocator ensure the FalseReg is allocated the
+ // same register as operand 0.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ FalseReg.setImplicit();
+ NewMI->addOperand(FalseReg);
+ NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
+
// The caller will erase MI, but not DefMI.
DefMI->eraseFromParent();
return NewMI;
@@ -2042,13 +2028,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
- if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
MI = 0;
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
UE = MRI->use_end(); UI != UE; ++UI) {
if (UI->getParent() != CmpInstr->getParent()) continue;
MachineInstr *PotentialAND = &*UI;
- if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
+ isPredicated(PotentialAND))
continue;
MI = PotentialAND;
break;
@@ -2114,6 +2101,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// The single candidate is called MI.
if (!MI) MI = Sub;
+ // We can't use a predicated instruction - it doesn't always write the flags.
+ if (isPredicated(MI))
+ return false;
+
switch (MI->getOpcode()) {
default: break;
case ARM::RSBrr:
@@ -2220,6 +2211,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
+ assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
CmpInstr->eraseFromParent();
// Modify the condition code of operands in OperandsToUpdate.
@@ -3366,7 +3358,8 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
// converted.
if (Subtarget.isCortexA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
- MI->getOpcode() == ARM::VMOVSR))
+ MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
// No other instructions can be swizzled, so just determine their domain.
@@ -3386,13 +3379,28 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
return std::make_pair(ExeGeneric, 0);
}
+static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
+ unsigned SReg, unsigned &Lane) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+
+ if (DReg != ARM::NoRegister)
+ return DReg;
+
+ Lane = 1;
+ DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
+
+ assert(DReg && "S-register with no D super-register?");
+ return DReg;
+}
+
+
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
MachineInstrBuilder MIB(MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
- bool isKill;
switch (MI->getOpcode()) {
default:
llvm_unreachable("cannot handle opcode!");
@@ -3403,78 +3411,175 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- // Change to a VORRd which requires two identical use operands.
- MI->setDesc(get(ARM::VORRd));
+ // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
- // Add the extra source operand and new predicates.
- // This will go before any implicit ops.
- AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VORRd));
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(SrcReg)
+ .addReg(SrcReg));
break;
case ARM::VMOVRS:
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+ // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
- DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
- Lane = 0;
- if (DReg == ARM::NoRegister) {
- DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
- Lane = 1;
- assert(DReg && "S-register with no D super-register?");
- }
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- MI->RemoveOperand(1);
+ DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
+ // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
+ // Note that DSrc has been widened and the other lane may be undef, which
+ // contaminates the entire register.
MI->setDesc(get(ARM::VGETLNi32));
- MIB.addReg(DReg);
- MIB.addImm(Lane);
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane));
- MIB->getOperand(1).setIsUndef();
+ // The old source should be an implicit use, otherwise we might think it
+ // was dead before here.
MIB.addReg(SrcReg, RegState::Implicit);
-
- AddDefaultPred(MIB);
break;
case ARM::VMOVSR:
if (Domain != ExeNEON)
break;
assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+ // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
- DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
- Lane = 0;
- if (DReg == ARM::NoRegister) {
- DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
- Lane = 1;
- assert(DReg && "S-register with no D super-register?");
- }
- isKill = MI->getOperand(0).isKill();
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
- MI->RemoveOperand(1);
- MI->RemoveOperand(0);
+ DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
+ // If we insert both a novel <def> and an <undef> on the DReg, we break
+ // any existing dependency chain on the unused lane. Either already being
+ // present means this instruction is in that chain anyway so we can make
+ // the transformation.
+ if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
+ // Again DDst may be undefined at the beginning of this instruction.
MI->setDesc(get(ARM::VSETLNi32));
- MIB.addReg(DReg, RegState::Define);
- MIB.addReg(DReg, RegState::Undef);
- MIB.addReg(SrcReg);
- MIB.addImm(Lane);
+ MIB.addReg(DReg, RegState::Define)
+ .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
+ .addReg(SrcReg)
+ .addImm(Lane);
+ AddDefaultPred(MIB);
- if (isKill)
- MIB->addRegisterKilled(DstReg, TRI, true);
- MIB->addRegisterDefined(DstReg, TRI);
+ // The narrower destination must be marked as set to keep previous chains
+ // in place.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ break;
+ case ARM::VMOVS: {
+ if (Domain != ExeNEON)
+ break;
+ // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
+ DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
+ DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
+
+ // If we insert both a novel <def> and an <undef> on the DReg, we break
+ // any existing dependency chain on the unused lane. Either already being
+ // present means this instruction is in that chain anyway so we can make
+ // the transformation.
+ if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI))
+ break;
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ if (DSrc == DDst) {
+ // Destination can be:
+ // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VDUPLN32d));
+ MIB.addReg(DDst, RegState::Define)
+ .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
+ .addImm(SrcLane);
+ AddDefaultPred(MIB);
+
+ // Neither the source or the destination are naturally represented any
+ // more, so add them in manually.
+ MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, RegState::Implicit);
+ break;
+ }
+
+ // In general there's no single instruction that can perform an S <-> S
+ // move in NEON space, but a pair of VEXT instructions *can* do the
+ // job. It turns out that the VEXTs needed will only use DSrc once, with
+ // the position based purely on the combination of lane-0 and lane-1
+ // involved. For example
+ // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
+ // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
+ // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
+ // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
+ //
+ // Pattern of the MachineInstrs is:
+ // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
+ MachineInstrBuilder NewMIB;
+ NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::VEXTd32), DDst);
+
+ // On the first instruction, both DSrc and DDst may be <undef> if present.
+ // Specifically when the original instruction didn't have them as an
+ // <imp-use>.
+ unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
+ bool CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = !MI->readsRegister(CurReg, TRI);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ NewMIB.addImm(1);
+ AddDefaultPred(NewMIB);
+
+ if (SrcLane == DstLane)
+ NewMIB.addReg(SrcReg, RegState::Implicit);
+
+ MI->setDesc(get(ARM::VEXTd32));
+ MIB.addReg(DDst, RegState::Define);
+
+ // On the second instruction, DDst has definitely been defined above, so
+ // it is not <undef>. DSrc, if present, can be <undef> as above.
+ CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
+ CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef));
+
+ MIB.addImm(1);
AddDefaultPred(MIB);
+
+ if (SrcLane != DstLane)
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ // As before, the original destination is no longer represented, add it
+ // implicitly.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
break;
+ }
}
}