diff options
Diffstat (limited to 'lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 349 |
1 files changed, 227 insertions, 122 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 29033e5..e2f0d7d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -683,7 +683,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Handle register classes that require multiple instructions. unsigned BeginIdx = 0; unsigned SubRegs = 0; - unsigned Spacing = 1; + int Spacing = 1; // Use VORRq when possible. if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) @@ -705,27 +705,38 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; - if (Opc) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineInstrBuilder Mov; - for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); - assert(Dst && Src && "Bad sub-register"); - Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src)); - // VORR takes two source operands. - if (Opc == ARM::VORRq) - Mov.addReg(Src); - } - // Add implicit super-register defs and kills to the last instruction. - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - return; - } + assert(Opc && "Impossible reg-to-reg copy"); - llvm_unreachable("Impossible reg-to-reg copy"); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstrBuilder Mov; + + // Copy register tuples backward when the first Dest reg overlaps with SrcReg. + if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { + BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + Spacing = -Spacing; + } +#ifndef NDEBUG + SmallSet<unsigned, 4> DstRegs; +#endif + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); +#ifndef NDEBUG + assert(!DstRegs.count(Src) && "destructive vector copy"); + DstRegs.insert(Dst); +#endif + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); + Mov = AddDefaultPred(Mov); + } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); } static const @@ -1569,16 +1580,20 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } /// Identify instructions that can be folded into a MOVCC instruction, and -/// return the corresponding opcode for the predicated pseudo-instruction. -static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, - const MachineRegisterInfo &MRI) { +/// return the defining instruction. +static MachineInstr *canFoldIntoMOVCC(unsigned Reg, + const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) return 0; if (!MRI.hasOneNonDBGUse(Reg)) return 0; - MI = MRI.getVRegDef(Reg); + MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return 0; + // MI is folded into the MOVCC by predicating it. + if (!MI->isPredicable()) + return 0; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { @@ -1588,55 +1603,18 @@ static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, return 0; if (!MO.isReg()) continue; + // MI can't have any tied operands, that would conflict with predication. + if (MO.isTied()) + return 0; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) return 0; if (MO.isDef() && !MO.isDead()) return 0; } - switch (MI->getOpcode()) { - default: return 0; - case ARM::ANDri: return ARM::ANDCCri; - case ARM::ANDrr: return ARM::ANDCCrr; - case ARM::ANDrsi: return ARM::ANDCCrsi; - case ARM::ANDrsr: return ARM::ANDCCrsr; - case ARM::t2ANDri: return ARM::t2ANDCCri; - case ARM::t2ANDrr: return ARM::t2ANDCCrr; - case ARM::t2ANDrs: return ARM::t2ANDCCrs; - case ARM::EORri: return ARM::EORCCri; - case ARM::EORrr: return ARM::EORCCrr; - case ARM::EORrsi: return ARM::EORCCrsi; - case ARM::EORrsr: return ARM::EORCCrsr; - case ARM::t2EORri: return ARM::t2EORCCri; - case ARM::t2EORrr: return ARM::t2EORCCrr; - case ARM::t2EORrs: return ARM::t2EORCCrs; - case ARM::ORRri: return ARM::ORRCCri; - case ARM::ORRrr: return ARM::ORRCCrr; - case ARM::ORRrsi: return ARM::ORRCCrsi; - case ARM::ORRrsr: return ARM::ORRCCrsr; - case ARM::t2ORRri: return ARM::t2ORRCCri; - case ARM::t2ORRrr: return ARM::t2ORRCCrr; - case ARM::t2ORRrs: return ARM::t2ORRCCrs; - - // ARM ADD/SUB - case ARM::ADDri: return ARM::ADDCCri; - case ARM::ADDrr: return ARM::ADDCCrr; - case ARM::ADDrsi: return ARM::ADDCCrsi; - case ARM::ADDrsr: return ARM::ADDCCrsr; - case ARM::SUBri: return ARM::SUBCCri; - case ARM::SUBrr: return ARM::SUBCCrr; - case ARM::SUBrsi: return ARM::SUBCCrsi; - case ARM::SUBrsr: return ARM::SUBCCrsr; - - // Thumb2 ADD/SUB - case ARM::t2ADDri: return ARM::t2ADDCCri; - case ARM::t2ADDri12: return ARM::t2ADDCCri12; - case ARM::t2ADDrr: return ARM::t2ADDCCrr; - case ARM::t2ADDrs: return ARM::t2ADDCCrs; - case ARM::t2SUBri: return ARM::t2SUBCCri; - case ARM::t2SUBri12: return ARM::t2SUBCCri12; - case ARM::t2SUBrr: return ARM::t2SUBCCrr; - case ARM::t2SUBrs: return ARM::t2SUBCCrs; - } + bool DontMoveAcrossStores = true; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) + return 0; + return MI; } bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, @@ -1665,19 +1643,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = 0; - unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); - bool Invert = !Opc; - if (!Opc) - Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); - if (!Opc) + MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); + bool Invert = !DefMI; + if (!DefMI) + DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); + if (!DefMI) return 0; // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - get(Opc), MI->getOperand(0).getReg()) - .addOperand(MI->getOperand(Invert ? 2 : 1)); + DefMI->getDesc(), + MI->getOperand(0).getReg()); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1696,6 +1673,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (NewMI->hasOptionalDef()) AddDefaultCC(NewMI); + // The output register value when the predicate is false is an implicit + // register operand tied to the first def. + // The tie makes the register allocator ensure the FalseReg is allocated the + // same register as operand 0. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + FalseReg.setImplicit(); + NewMI->addOperand(FalseReg); + NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -2042,13 +2028,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { - if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { + if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { MI = 0; for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), UE = MRI->use_end(); UI != UE; ++UI) { if (UI->getParent() != CmpInstr->getParent()) continue; MachineInstr *PotentialAND = &*UI; - if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) + if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || + isPredicated(PotentialAND)) continue; MI = PotentialAND; break; @@ -2114,6 +2101,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // The single candidate is called MI. if (!MI) MI = Sub; + // We can't use a predicated instruction - it doesn't always write the flags. + if (isPredicated(MI)) + return false; + switch (MI->getOpcode()) { default: break; case ARM::RSBrr: @@ -2220,6 +2211,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); + assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); CmpInstr->eraseFromParent(); // Modify the condition code of operands in OperandsToUpdate. @@ -3366,7 +3358,8 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { // converted. if (Subtarget.isCortexA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR)) + MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); // No other instructions can be swizzled, so just determine their domain. @@ -3386,13 +3379,28 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { return std::make_pair(ExeGeneric, 0); } +static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, + unsigned SReg, unsigned &Lane) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + + if (DReg != ARM::NoRegister) + return DReg; + + Lane = 1; + DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); + + assert(DReg && "S-register with no D super-register?"); + return DReg; +} + + void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; MachineInstrBuilder MIB(MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); - bool isKill; switch (MI->getOpcode()) { default: llvm_unreachable("cannot handle opcode!"); @@ -3403,78 +3411,175 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VORRd)); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(SrcReg) + .addReg(SrcReg)); break; case ARM::VMOVRS: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); + DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); + // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) + // Note that DSrc has been widened and the other lane may be undef, which + // contaminates the entire register. MI->setDesc(get(ARM::VGETLNi32)); - MIB.addReg(DReg); - MIB.addImm(Lane); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(DReg, RegState::Undef) + .addImm(Lane)); - MIB->getOperand(1).setIsUndef(); + // The old source should be an implicit use, otherwise we might think it + // was dead before here. MIB.addReg(SrcReg, RegState::Implicit); - - AddDefaultPred(MIB); break; case ARM::VMOVSR: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } - isKill = MI->getOperand(0).isKill(); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); - MI->RemoveOperand(0); + DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); + // If we insert both a novel <def> and an <undef> on the DReg, we break + // any existing dependency chain on the unused lane. Either already being + // present means this instruction is in that chain anyway so we can make + // the transformation. + if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI)) + break; + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) + // Again DDst may be undefined at the beginning of this instruction. MI->setDesc(get(ARM::VSETLNi32)); - MIB.addReg(DReg, RegState::Define); - MIB.addReg(DReg, RegState::Undef); - MIB.addReg(SrcReg); - MIB.addImm(Lane); + MIB.addReg(DReg, RegState::Define) + .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) + .addReg(SrcReg) + .addImm(Lane); + AddDefaultPred(MIB); - if (isKill) - MIB->addRegisterKilled(DstReg, TRI, true); - MIB->addRegisterDefined(DstReg, TRI); + // The narrower destination must be marked as set to keep previous chains + // in place. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + break; + case ARM::VMOVS: { + if (Domain != ExeNEON) + break; + // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; + DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); + DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); + + // If we insert both a novel <def> and an <undef> on the DReg, we break + // any existing dependency chain on the unused lane. Either already being + // present means this instruction is in that chain anyway so we can make + // the transformation. + if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI)) + break; + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + if (DSrc == DDst) { + // Destination can be: + // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VDUPLN32d)); + MIB.addReg(DDst, RegState::Define) + .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) + .addImm(SrcLane); + AddDefaultPred(MIB); + + // Neither the source or the destination are naturally represented any + // more, so add them in manually. + MIB.addReg(DstReg, RegState::Implicit | RegState::Define); + MIB.addReg(SrcReg, RegState::Implicit); + break; + } + + // In general there's no single instruction that can perform an S <-> S + // move in NEON space, but a pair of VEXT instructions *can* do the + // job. It turns out that the VEXTs needed will only use DSrc once, with + // the position based purely on the combination of lane-0 and lane-1 + // involved. For example + // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 + // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 + // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 + // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 + // + // Pattern of the MachineInstrs is: + // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) + MachineInstrBuilder NewMIB; + NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::VEXTd32), DDst); + + // On the first instruction, both DSrc and DDst may be <undef> if present. + // Specifically when the original instruction didn't have them as an + // <imp-use>. + unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; + bool CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; + CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + NewMIB.addImm(1); + AddDefaultPred(NewMIB); + + if (SrcLane == DstLane) + NewMIB.addReg(SrcReg, RegState::Implicit); + + MI->setDesc(get(ARM::VEXTd32)); + MIB.addReg(DDst, RegState::Define); + + // On the second instruction, DDst has definitely been defined above, so + // it is not <undef>. DSrc, if present, can be <undef> as above. + CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + MIB.addImm(1); AddDefaultPred(MIB); + + if (SrcLane != DstLane) + MIB.addReg(SrcReg, RegState::Implicit); + + // As before, the original destination is no longer represented, add it + // implicitly. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); break; + } } } |