diff options
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 349 | ||||
-rw-r--r-- | lib/Target/ARM/ARMCodeEmitter.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMConstantIslandPass.cpp | 5 | ||||
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 51 | ||||
-rw-r--r-- | lib/Target/ARM/ARMFastISel.cpp | 74 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 32 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 236 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 108 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 36 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb.td | 1 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 90 | ||||
-rw-r--r-- | lib/Target/ARM/ARMJITInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 198 | ||||
-rw-r--r-- | lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 52 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 2 |
19 files changed, 806 insertions, 447 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 29033e5..e2f0d7d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -683,7 +683,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Handle register classes that require multiple instructions. unsigned BeginIdx = 0; unsigned SubRegs = 0; - unsigned Spacing = 1; + int Spacing = 1; // Use VORRq when possible. if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) @@ -705,27 +705,38 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; - if (Opc) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineInstrBuilder Mov; - for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); - assert(Dst && Src && "Bad sub-register"); - Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src)); - // VORR takes two source operands. - if (Opc == ARM::VORRq) - Mov.addReg(Src); - } - // Add implicit super-register defs and kills to the last instruction. - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - return; - } + assert(Opc && "Impossible reg-to-reg copy"); - llvm_unreachable("Impossible reg-to-reg copy"); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstrBuilder Mov; + + // Copy register tuples backward when the first Dest reg overlaps with SrcReg. + if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { + BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + Spacing = -Spacing; + } +#ifndef NDEBUG + SmallSet<unsigned, 4> DstRegs; +#endif + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); +#ifndef NDEBUG + assert(!DstRegs.count(Src) && "destructive vector copy"); + DstRegs.insert(Dst); +#endif + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); + Mov = AddDefaultPred(Mov); + } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); } static const @@ -1569,16 +1580,20 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } /// Identify instructions that can be folded into a MOVCC instruction, and -/// return the corresponding opcode for the predicated pseudo-instruction. -static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, - const MachineRegisterInfo &MRI) { +/// return the defining instruction. +static MachineInstr *canFoldIntoMOVCC(unsigned Reg, + const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) return 0; if (!MRI.hasOneNonDBGUse(Reg)) return 0; - MI = MRI.getVRegDef(Reg); + MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return 0; + // MI is folded into the MOVCC by predicating it. + if (!MI->isPredicable()) + return 0; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { @@ -1588,55 +1603,18 @@ static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, return 0; if (!MO.isReg()) continue; + // MI can't have any tied operands, that would conflict with predication. + if (MO.isTied()) + return 0; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) return 0; if (MO.isDef() && !MO.isDead()) return 0; } - switch (MI->getOpcode()) { - default: return 0; - case ARM::ANDri: return ARM::ANDCCri; - case ARM::ANDrr: return ARM::ANDCCrr; - case ARM::ANDrsi: return ARM::ANDCCrsi; - case ARM::ANDrsr: return ARM::ANDCCrsr; - case ARM::t2ANDri: return ARM::t2ANDCCri; - case ARM::t2ANDrr: return ARM::t2ANDCCrr; - case ARM::t2ANDrs: return ARM::t2ANDCCrs; - case ARM::EORri: return ARM::EORCCri; - case ARM::EORrr: return ARM::EORCCrr; - case ARM::EORrsi: return ARM::EORCCrsi; - case ARM::EORrsr: return ARM::EORCCrsr; - case ARM::t2EORri: return ARM::t2EORCCri; - case ARM::t2EORrr: return ARM::t2EORCCrr; - case ARM::t2EORrs: return ARM::t2EORCCrs; - case ARM::ORRri: return ARM::ORRCCri; - case ARM::ORRrr: return ARM::ORRCCrr; - case ARM::ORRrsi: return ARM::ORRCCrsi; - case ARM::ORRrsr: return ARM::ORRCCrsr; - case ARM::t2ORRri: return ARM::t2ORRCCri; - case ARM::t2ORRrr: return ARM::t2ORRCCrr; - case ARM::t2ORRrs: return ARM::t2ORRCCrs; - - // ARM ADD/SUB - case ARM::ADDri: return ARM::ADDCCri; - case ARM::ADDrr: return ARM::ADDCCrr; - case ARM::ADDrsi: return ARM::ADDCCrsi; - case ARM::ADDrsr: return ARM::ADDCCrsr; - case ARM::SUBri: return ARM::SUBCCri; - case ARM::SUBrr: return ARM::SUBCCrr; - case ARM::SUBrsi: return ARM::SUBCCrsi; - case ARM::SUBrsr: return ARM::SUBCCrsr; - - // Thumb2 ADD/SUB - case ARM::t2ADDri: return ARM::t2ADDCCri; - case ARM::t2ADDri12: return ARM::t2ADDCCri12; - case ARM::t2ADDrr: return ARM::t2ADDCCrr; - case ARM::t2ADDrs: return ARM::t2ADDCCrs; - case ARM::t2SUBri: return ARM::t2SUBCCri; - case ARM::t2SUBri12: return ARM::t2SUBCCri12; - case ARM::t2SUBrr: return ARM::t2SUBCCrr; - case ARM::t2SUBrs: return ARM::t2SUBCCrs; - } + bool DontMoveAcrossStores = true; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) + return 0; + return MI; } bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, @@ -1665,19 +1643,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = 0; - unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); - bool Invert = !Opc; - if (!Opc) - Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); - if (!Opc) + MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); + bool Invert = !DefMI; + if (!DefMI) + DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); + if (!DefMI) return 0; // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - get(Opc), MI->getOperand(0).getReg()) - .addOperand(MI->getOperand(Invert ? 2 : 1)); + DefMI->getDesc(), + MI->getOperand(0).getReg()); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1696,6 +1673,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (NewMI->hasOptionalDef()) AddDefaultCC(NewMI); + // The output register value when the predicate is false is an implicit + // register operand tied to the first def. + // The tie makes the register allocator ensure the FalseReg is allocated the + // same register as operand 0. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + FalseReg.setImplicit(); + NewMI->addOperand(FalseReg); + NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -2042,13 +2028,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { - if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { + if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { MI = 0; for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), UE = MRI->use_end(); UI != UE; ++UI) { if (UI->getParent() != CmpInstr->getParent()) continue; MachineInstr *PotentialAND = &*UI; - if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) + if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || + isPredicated(PotentialAND)) continue; MI = PotentialAND; break; @@ -2114,6 +2101,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // The single candidate is called MI. if (!MI) MI = Sub; + // We can't use a predicated instruction - it doesn't always write the flags. + if (isPredicated(MI)) + return false; + switch (MI->getOpcode()) { default: break; case ARM::RSBrr: @@ -2220,6 +2211,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); + assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); CmpInstr->eraseFromParent(); // Modify the condition code of operands in OperandsToUpdate. @@ -3366,7 +3358,8 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { // converted. if (Subtarget.isCortexA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR)) + MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); // No other instructions can be swizzled, so just determine their domain. @@ -3386,13 +3379,28 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { return std::make_pair(ExeGeneric, 0); } +static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, + unsigned SReg, unsigned &Lane) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + + if (DReg != ARM::NoRegister) + return DReg; + + Lane = 1; + DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); + + assert(DReg && "S-register with no D super-register?"); + return DReg; +} + + void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; MachineInstrBuilder MIB(MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); - bool isKill; switch (MI->getOpcode()) { default: llvm_unreachable("cannot handle opcode!"); @@ -3403,78 +3411,175 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VORRd)); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(SrcReg) + .addReg(SrcReg)); break; case ARM::VMOVRS: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); + DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); + // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) + // Note that DSrc has been widened and the other lane may be undef, which + // contaminates the entire register. MI->setDesc(get(ARM::VGETLNi32)); - MIB.addReg(DReg); - MIB.addImm(Lane); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(DReg, RegState::Undef) + .addImm(Lane)); - MIB->getOperand(1).setIsUndef(); + // The old source should be an implicit use, otherwise we might think it + // was dead before here. MIB.addReg(SrcReg, RegState::Implicit); - - AddDefaultPred(MIB); break; case ARM::VMOVSR: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } - isKill = MI->getOperand(0).isKill(); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); - MI->RemoveOperand(0); + DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); + // If we insert both a novel <def> and an <undef> on the DReg, we break + // any existing dependency chain on the unused lane. Either already being + // present means this instruction is in that chain anyway so we can make + // the transformation. + if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI)) + break; + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) + // Again DDst may be undefined at the beginning of this instruction. MI->setDesc(get(ARM::VSETLNi32)); - MIB.addReg(DReg, RegState::Define); - MIB.addReg(DReg, RegState::Undef); - MIB.addReg(SrcReg); - MIB.addImm(Lane); + MIB.addReg(DReg, RegState::Define) + .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) + .addReg(SrcReg) + .addImm(Lane); + AddDefaultPred(MIB); - if (isKill) - MIB->addRegisterKilled(DstReg, TRI, true); - MIB->addRegisterDefined(DstReg, TRI); + // The narrower destination must be marked as set to keep previous chains + // in place. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + break; + case ARM::VMOVS: { + if (Domain != ExeNEON) + break; + // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; + DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); + DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); + + // If we insert both a novel <def> and an <undef> on the DReg, we break + // any existing dependency chain on the unused lane. Either already being + // present means this instruction is in that chain anyway so we can make + // the transformation. + if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI)) + break; + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + if (DSrc == DDst) { + // Destination can be: + // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VDUPLN32d)); + MIB.addReg(DDst, RegState::Define) + .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) + .addImm(SrcLane); + AddDefaultPred(MIB); + + // Neither the source or the destination are naturally represented any + // more, so add them in manually. + MIB.addReg(DstReg, RegState::Implicit | RegState::Define); + MIB.addReg(SrcReg, RegState::Implicit); + break; + } + + // In general there's no single instruction that can perform an S <-> S + // move in NEON space, but a pair of VEXT instructions *can* do the + // job. It turns out that the VEXTs needed will only use DSrc once, with + // the position based purely on the combination of lane-0 and lane-1 + // involved. For example + // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 + // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 + // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 + // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 + // + // Pattern of the MachineInstrs is: + // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) + MachineInstrBuilder NewMIB; + NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::VEXTd32), DDst); + + // On the first instruction, both DSrc and DDst may be <undef> if present. + // Specifically when the original instruction didn't have them as an + // <imp-use>. + unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; + bool CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; + CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + NewMIB.addImm(1); + AddDefaultPred(NewMIB); + + if (SrcLane == DstLane) + NewMIB.addReg(SrcReg, RegState::Implicit); + + MI->setDesc(get(ARM::VEXTd32)); + MIB.addReg(DDst, RegState::Define); + + // On the second instruction, DDst has definitely been defined above, so + // it is not <undef>. DSrc, if present, can be <undef> as above. + CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + MIB.addImm(1); AddDefaultPred(MIB); + + if (SrcLane != DstLane) + MIB.addReg(SrcReg, RegState::Implicit); + + // As before, the original destination is no longer represented, add it + // implicitly. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); break; + } } } diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 132b81f..89cbc84 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -410,7 +410,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { do { DEBUG(errs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index a953985..dd05f0c 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1388,10 +1388,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // If the original WaterList entry was "new water" on this iteration, // propagate that to the new island. This is just keeping NewWaterList // updated to match the WaterList, which will be updated below. - if (NewWaterList.count(WaterBB)) { - NewWaterList.erase(WaterBB); + if (NewWaterList.erase(WaterBB)) NewWaterList.insert(NewIsland); - } + // The new CPE goes before the following block (NewMBB). NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 15bb32e..8ed6b75 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1208,6 +1208,57 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; + case ARM::VSETLNi8Q: + case ARM::VSETLNi16Q: { + // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting + // on the respective D register. + + unsigned QReg = MI.getOperand(1).getReg(); + unsigned QLane = MI.getOperand(3).getImm(); + + unsigned NewOpcode, DLane, DSubReg; + switch (Opcode) { + default: llvm_unreachable("Invalid opcode!"); + case ARM::VSETLNi8Q: + // 4 possible 8-bit lanes per DPR: + NewOpcode = ARM::VSETLNi8; + DLane = QLane % 8; + DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0; + break; + case ARM::VSETLNi16Q: + // 4 possible 16-bit lanes per DPR. + NewOpcode = ARM::VSETLNi16; + DLane = QLane % 4; + DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0; + break; + } + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode)); + + unsigned DReg = TRI->getSubReg(QReg, DSubReg); + + MIB.addReg(DReg, RegState::Define); // Output DPR + MIB.addReg(DReg); // Input DPR + MIB.addOperand(MI.getOperand(2)); // Input GPR + MIB.addImm(DLane); // Lane + + // Add the predicate operands. + MIB.addOperand(MI.getOperand(4)); + MIB.addOperand(MI.getOperand(5)); + + if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register. + MIB->addRegisterKilled(QReg, TRI, true); + // And an implicit def of the output register (which should always be the + // same as the input register). + MIB->addRegisterDefined(QReg, TRI); + + TransferImpOps(MI, MIB, MIB); + + MI.eraseFromParent(); + return true; + } + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 5a5ca1b..045d904 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -617,10 +617,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { if (VT != MVT::i32) return 0; Reloc::Model RelocM = TM.getRelocationModel(); - - // TODO: Need more magic for ARM PIC. - if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; - + bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); // Use movw+movt when possible, it avoids constant pool entries. @@ -668,17 +665,30 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { .addConstantPoolIndex(Idx); if (RelocM == Reloc::PIC_) MIB.addImm(Id); + AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) .addImm(0); + AddOptionalDefs(MIB); + + if (RelocM == Reloc::PIC_) { + unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD; + unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), NewDestReg) + .addReg(DestReg) + .addImm(Id); + AddOptionalDefs(MIB); + return NewDestReg; + } } - AddOptionalDefs(MIB); } - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + if (IsIndirect) { MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) @@ -2212,25 +2222,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); - if (isThumb2) { - // Explicitly adding the predicate here. + // BL / BLX don't take a predicate, but tBL / tBLX do. + if (isThumb2) AddDefaultPred(MIB); - if (EnableARMLongCalls) - MIB.addReg(CalleeReg); - else - MIB.addExternalSymbol(TLI.getLibcallName(Call)); - } else { - if (EnableARMLongCalls) - MIB.addReg(CalleeReg); - else - MIB.addExternalSymbol(TLI.getLibcallName(Call)); + if (EnableARMLongCalls) + MIB.addReg(CalleeReg); + else + MIB.addExternalSymbol(TLI.getLibcallName(Call)); - // Explicitly adding the predicate here. - AddDefaultPred(MIB); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2358,30 +2360,20 @@ bool ARMFastISel::SelectCall(const Instruction *I, unsigned CallOpc = ARMSelectCallOp(UseReg); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); - if(isThumb2) { - // Explicitly adding the predicate here. - AddDefaultPred(MIB); - if (UseReg) - MIB.addReg(CalleeReg); - else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - } else { - if (UseReg) - MIB.addReg(CalleeReg); - else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - // Explicitly adding the predicate here. + // ARM calls don't take a predicate, but tBL / tBLX do. + if(isThumb2) AddDefaultPred(MIB); - } + if (UseReg) + MIB.addReg(CalleeReg); + else if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2650,7 +2642,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, unsigned Reg1 = getRegForValue(Src1Value); if (Reg1 == 0) return false; - unsigned Reg2; + unsigned Reg2 = 0; if (Opc == ARM::MOVsr) { Reg2 = getRegForValue(Src2Value); if (Reg2 == 0) return false; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c6f9d15..1406620 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2637,6 +2637,38 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { dl, MVT::i32, MVT::i32, Ops, 5); } } + case ARMISD::UMLAL:{ + if (Subtarget->isThumb()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + }else{ + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::UMLAL : ARM::UMLALv5, + dl, MVT::i32, MVT::i32, Ops, 7); + } + } + case ARMISD::SMLAL:{ + if (Subtarget->isThumb()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + }else{ + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::SMLAL : ARM::SMLALv5, + dl, MVT::i32, MVT::i32, Ops, 7); + } + } case ISD::LOAD: { SDNode *ResNode = 0; if (Subtarget->isThumb() && Subtarget->hasThumb2()) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index df4039b..e51315e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -514,6 +514,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); setOperationAction(ISD::FEXP, MVT::v4f32, Expand); setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -566,6 +567,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + // ARM and Thumb2 support UMLAL/SMLAL. + if (!Subtarget->isThumb1Only()) + setTargetDAGCombine(ISD::ADDC); + + computeRegisterProperties(); // ARM does not have f32 extending load. @@ -791,12 +797,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - - if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::XOR); - } + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); @@ -981,6 +984,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; + case ARMISD::UMLAL: return "ARMISD::UMLAL"; + case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; @@ -4154,10 +4159,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Scan through the operands to see if only one value is used. + // + // As an optimisation, even if more than one value is used it may be more + // profitable to splat with one value then change some lanes. + // + // Heuristically we decide to do this if the vector has a "dominant" value, + // defined as splatted to more than half of the lanes. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; + bool hasDominantValue = false; bool isConstant = true; + + // Map of the number of times a particular SDValue appears in the + // element list. + DenseMap<SDValue, unsigned> ValueCounts; SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); @@ -4168,13 +4184,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) isConstant = false; - if (!Value.getNode()) + ValueCounts.insert(std::make_pair(V, 0)); + unsigned &Count = ValueCounts[V]; + + // Is this value dominant? (takes up more than half of the lanes) + if (++Count > (NumElts / 2)) { + hasDominantValue = true; Value = V; - else if (V != Value) - usesOnlyOneValue = false; + } } + if (ValueCounts.size() != 1) + usesOnlyOneValue = false; + if (!Value.getNode() && ValueCounts.size() > 0) + Value = ValueCounts.begin()->first; - if (!Value.getNode()) + if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); if (isOnlyLowElement) @@ -4184,9 +4208,34 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Use VDUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. - if (usesOnlyOneValue && EltSize <= 32) { - if (!isConstant) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (hasDominantValue && EltSize <= 32) { + if (!isConstant) { + SDValue N; + + // If we are VDUPing a value that comes directly from a vector, that will + // cause an unnecessary move to and from a GPR, where instead we could + // just use VDUPLANE. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + Value->getOperand(0), Value->getOperand(1)); + else + N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); + + if (!usesOnlyOneValue) { + // The dominant value was splatted as 'N', but we now have to insert + // all differing elements. + for (unsigned I = 0; I < NumElts; ++I) { + if (Op.getOperand(I) == Value) + continue; + SmallVector<SDValue, 3> Ops; + Ops.push_back(N); + Ops.push_back(Op.getOperand(I)); + Ops.push_back(DAG.getConstant(I, MVT::i32)); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + } + } + return N; + } if (VT.getVectorElementType().isFloatingPoint()) { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) @@ -4198,9 +4247,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); } - SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); - if (Val.getNode()) - return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + if (usesOnlyOneValue) { + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (isConstant && Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + } } // If all elements are constants and the case above didn't get hit, fall back @@ -5418,7 +5469,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5529,7 +5580,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -7193,6 +7244,154 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); } +static SDValue findMUL_LOHI(SDValue V) { + if (V->getOpcode() == ISD::UMUL_LOHI || + V->getOpcode() == ISD::SMUL_LOHI) + return V; + return SDValue(); +} + +static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + if (Subtarget->isThumb1Only()) return SDValue(); + + // Only perform the checks after legalize when the pattern is available. + if (DCI.isBeforeLegalize()) return SDValue(); + + // Look for multiply add opportunities. + // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where + // each add nodes consumes a value from ISD::UMUL_LOHI and there is + // a glue link from the first add to the second add. + // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by + // a S/UMLAL instruction. + // loAdd UMUL_LOHI + // \ / :lo \ :hi + // \ / \ [no multiline comment] + // ADDC | hiAdd + // \ :glue / / + // \ / / + // ADDE + // + assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); + SDValue AddcOp0 = AddcNode->getOperand(0); + SDValue AddcOp1 = AddcNode->getOperand(1); + + // Check if the two operands are from the same mul_lohi node. + if (AddcOp0.getNode() == AddcOp1.getNode()) + return SDValue(); + + assert(AddcNode->getNumValues() == 2 && + AddcNode->getValueType(0) == MVT::i32 && + AddcNode->getValueType(1) == MVT::Glue && + "Expect ADDC with two result values: i32, glue"); + + // Check that the ADDC adds the low result of the S/UMUL_LOHI. + if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && + AddcOp0->getOpcode() != ISD::SMUL_LOHI && + AddcOp1->getOpcode() != ISD::UMUL_LOHI && + AddcOp1->getOpcode() != ISD::SMUL_LOHI) + return SDValue(); + + // Look for the glued ADDE. + SDNode* AddeNode = AddcNode->getGluedUser(); + if (AddeNode == NULL) + return SDValue(); + + // Make sure it is really an ADDE. + if (AddeNode->getOpcode() != ISD::ADDE) + return SDValue(); + + assert(AddeNode->getNumOperands() == 3 && + AddeNode->getOperand(2).getValueType() == MVT::Glue && + "ADDE node has the wrong inputs"); + + // Check for the triangle shape. + SDValue AddeOp0 = AddeNode->getOperand(0); + SDValue AddeOp1 = AddeNode->getOperand(1); + + // Make sure that the ADDE operands are not coming from the same node. + if (AddeOp0.getNode() == AddeOp1.getNode()) + return SDValue(); + + // Find the MUL_LOHI node walking up ADDE's operands. + bool IsLeftOperandMUL = false; + SDValue MULOp = findMUL_LOHI(AddeOp0); + if (MULOp == SDValue()) + MULOp = findMUL_LOHI(AddeOp1); + else + IsLeftOperandMUL = true; + if (MULOp == SDValue()) + return SDValue(); + + // Figure out the right opcode. + unsigned Opc = MULOp->getOpcode(); + unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; + + // Figure out the high and low input values to the MLAL node. + SDValue* HiMul = &MULOp; + SDValue* HiAdd = NULL; + SDValue* LoMul = NULL; + SDValue* LowAdd = NULL; + + if (IsLeftOperandMUL) + HiAdd = &AddeOp1; + else + HiAdd = &AddeOp0; + + + if (AddcOp0->getOpcode() == Opc) { + LoMul = &AddcOp0; + LowAdd = &AddcOp1; + } + if (AddcOp1->getOpcode() == Opc) { + LoMul = &AddcOp1; + LowAdd = &AddcOp0; + } + + if (LoMul == NULL) + return SDValue(); + + if (LoMul->getNode() != HiMul->getNode()) + return SDValue(); + + // Create the merged node. + SelectionDAG &DAG = DCI.DAG; + + // Build operand list. + SmallVector<SDValue, 8> Ops; + Ops.push_back(LoMul->getOperand(0)); + Ops.push_back(LoMul->getOperand(1)); + Ops.push_back(*LowAdd); + Ops.push_back(*HiAdd); + + SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(), + DAG.getVTList(MVT::i32, MVT::i32), + &Ops[0], Ops.size()); + + // Replace the ADDs' nodes uses by the MLA node's values. + SDValue HiMLALResult(MLALNode.getNode(), 1); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); + + SDValue LoMLALResult(MLALNode.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); + + // Return original node to notify the driver to stop replacing. + SDValue resNode(AddcNode, 0); + return resNode; +} + +/// PerformADDCCombine - Target-specific dag combine transform from +/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. +static SDValue PerformADDCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + return AddCombineTo64bitMLAL(N, DCI, Subtarget); + +} + /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted @@ -8764,6 +8963,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 13b83de..2b8f382 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -173,6 +173,9 @@ namespace llvm { VMULLs, // ...signed VMULLu, // ...unsigned + UMLAL, // 64bit Unsigned Accumulate Multiply + SMLAL, // 64bit Signed Accumulate Multiply + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -257,6 +260,11 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; + virtual bool isSelectSupported(SelectSupportKind Kind) const { + // ARM does not support scalar condition selects on vectors. + return (Kind != ScalarCondVectorVal); + } + /// getSetCCResultType - Return the value type to use for ISD::SETCC. virtual EVT getSetCCResultType(EVT VT) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 992aba5..e23989e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -83,6 +83,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisInt<0>, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; + +def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>, + SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >; +def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>; +def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; @@ -90,9 +97,10 @@ def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , SDT_ARMStructByVal, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, @@ -148,14 +156,16 @@ def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", - SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Setjmp, + [SDNPHasChain, SDNPSideEffect]>; def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", - SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Longjmp, + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect]>; def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; @@ -275,7 +285,7 @@ def imm16_31 : ImmLeaf<i32, [{ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - int64_t Value = -(int)N->getZExtValue(); + unsigned Value = -(unsigned)N->getZExtValue(); return Value && ARM_AM::getSOImmVal(Value) != -1; }], imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; @@ -1791,12 +1801,15 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), let Inst{15-12} = Rd; let Inst{11-0} = label{11-0}; } + +let hasSideEffects = 1 in { def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, []>; +} //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -3399,6 +3412,18 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = Rm; let Inst{3-0} = Rn; } +class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} // FIXME: The v5 pseudos are only necessary for the additional Constraint // property. Remove them when it's possible to add those properties @@ -3481,14 +3506,14 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), } // Multiply + accumulate -def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, +def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; -def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; +def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, @@ -3504,17 +3529,22 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), let Inst{3-0} = Rn; } -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { +let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in { def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], - (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], - (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; +} + +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p), 4, IIC_iMAC64, [], @@ -3986,48 +4016,6 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; -// Conditional instructions -multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, - Instruction irsr, - InstrItinClass iii, InstrItinClass iir, - InstrItinClass iis> { - def ri : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm, - pred:$p, cc_out:$s), - 4, iii, [], - (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rr : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm, - pred:$p, cc_out:$s), - 4, iir, [], - (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rsi : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift, - pred:$p, cc_out:$s), - 4, iis, [], - (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift, - pred:$p, cc_out:$s), - 4, iis, [], - (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; -} - -defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; - } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 048d340..8158a11 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1980,7 +1980,7 @@ def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, @@ -2023,7 +2023,7 @@ def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, extractelt, addrmode6oneL32> { @@ -5045,25 +5045,23 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; } + +def VSETLNi8Q : PseudoNeonI<(outs QPR:$V), + (ins QPR:$src1, GPR:$R, VectorIndex8:$lane), + IIC_VMOVISL, "", + [(set QPR:$V, (vector_insert (v16i8 QPR:$src1), + GPR:$R, imm:$lane))]>; +def VSETLNi16Q : PseudoNeonI<(outs QPR:$V), + (ins QPR:$src1, GPR:$R, VectorIndex16:$lane), + IIC_VMOVISL, "", + [(set QPR:$V, (vector_insert (v8i16 QPR:$src1), + GPR:$R, imm:$lane))]>; } -def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), - (v16i8 (INSERT_SUBREG QPR:$src1, - (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i8_reg imm:$lane))), - GPR:$src2, (SubReg_i8_lane imm:$lane))), - (DSubReg_i8_reg imm:$lane)))>; -def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), - (v8i16 (INSERT_SUBREG QPR:$src1, - (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i16_reg imm:$lane))), - GPR:$src2, (SubReg_i16_lane imm:$lane))), - (DSubReg_i16_reg imm:$lane)))>; + def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i32_reg imm:$lane))), - GPR:$src2, (SubReg_i32_lane imm:$lane))), - (DSubReg_i32_reg imm:$lane)))>; + (v4i32 (INSERT_SUBREG QPR:$src1, + GPR:$src2, + (SSubReg_f32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 554f6d9..e171f8b 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1200,6 +1200,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), 2, IIC_iALUi, []>; +let hasSideEffects = 1 in def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 2, IIC_iALUi, []>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 8ecf009..f1a6cce 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -523,6 +523,23 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, let Inst{7-4} = opc7_4; let Inst{3-0} = Rm; } +class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-23} = 0b111110111; + let Inst{22-20} = opc22_20; + let Inst{19-16} = Rn; + let Inst{15-12} = RdLo; + let Inst{11-8} = RdHi; + let Inst{7-4} = opc7_4; + let Inst{3-0} = Rm; +} /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a @@ -757,33 +774,6 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{24} = 1; let Inst{23-21} = op23_21; } - - // Predicated versions. - def CCri : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm, - pred:$p, cc_out:$s), 4, IIC_iALUi, [], - (!cast<Instruction>(NAME#ri) GPRnopc:$Rd, - GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm, - pred:$p), - 4, IIC_iALUi, [], - (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd, - GPR:$Rn, imm0_4095:$imm, pred:$p)>, - RegConstraint<"$Rfalse = $Rd">; - def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm, - pred:$p, cc_out:$s), 4, IIC_iALUr, [], - (!cast<Instruction>(NAME#rr) GPRnopc:$Rd, - GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm, - pred:$p, cc_out:$s), 4, IIC_iALUsi, [], - (!cast<Instruction>(NAME#rs) GPRnopc:$Rd, - GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns @@ -1200,6 +1190,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; +let hasSideEffects = 1 in def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, @@ -2437,15 +2428,17 @@ def t2UMULL : T2MulLong<0b010, 0b0000, } // isCommutable // Multiply + accumulate -def t2SMLAL : T2MulLong<0b100, 0b0000, +def t2SMLAL : T2MlaLong<0b100, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; -def t2UMLAL : T2MulLong<0b110, 0b0000, +def t2UMLAL : T2MlaLong<0b110, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; def t2UMAAL : T2MulLong<0b110, 0b0110, (outs rGPR:$RdLo, rGPR:$RdHi), @@ -3049,37 +3042,6 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), RegConstraint<"$false = $Rd">; } // isCodeGenOnly = 1 -multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { - // shifted imm - def ri : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm, - pred:$p, cc_out:$s), - 4, iii, [], - (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - // register - def rr : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm, - pred:$p, cc_out:$s), - 4, iir, [], - (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - // shifted register - def rs : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm, - pred:$p, cc_out:$s), - 4, iis, [], - (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; -} // T2I_bincc_irs - -defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; } // neverHasSideEffects //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index a812e21..2dc49a9 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -169,7 +169,7 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, intptr_t LazyPtr = getIndirectSymAddr(Fn); if (!LazyPtr) { // In PIC mode, the function stub is loading a lazy-ptr. - LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE); + LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE); DEBUG(if (F) errs() << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '" << F->getName() << "'\n"; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3a5957b..e1e2f6e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -181,49 +181,44 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); // Asm Match Converter Methods - bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + void cvtLdWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + void cvtLdExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + void cvtLdExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + void cvtStExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + void cvtStExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtLdWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode, + void cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, + void cvtVLDwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, + void cvtVLDwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, + void cvtVSTwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, + void cvtVSTwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool processInstruction(MCInst &Inst, @@ -267,6 +262,12 @@ public: bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); + + unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands); + } }; } // end anonymous namespace @@ -3880,8 +3881,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// cvtT2LdrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt, Rt2 ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3892,14 +3893,13 @@ cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtT2StrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtT2StrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateReg(0)); @@ -3910,14 +3910,13 @@ cvtT2StrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3926,28 +3925,26 @@ cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3956,14 +3953,13 @@ cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3972,57 +3968,53 @@ cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4034,14 +4026,13 @@ cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4053,14 +4044,13 @@ cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4072,14 +4062,13 @@ cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4091,14 +4080,13 @@ cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt, Rt2 ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4109,14 +4097,13 @@ cvtLdrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4127,40 +4114,27 @@ cvtStrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -/// cvtThumbMultiple- Convert parsed operands to MCInst. +/// cvtThumbMultiply - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtThumbMultiply(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // The second source operand must be the same register as the destination - // operand. - if (Operands.size() == 6 && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[5])->getReg()) && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[4])->getReg())) { - Error(Operands[3]->getStartLoc(), - "destination register must match source register"); - return false; - } ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); // If we have a three-operand form, make sure to set Rn to be the operand @@ -4173,12 +4147,10 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1); Inst.addOperand(Inst.getOperand(0)); ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); - - return true; } -bool ARMAsmParser:: -cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVLDwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); @@ -4188,11 +4160,10 @@ cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVLDwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); @@ -4204,11 +4175,10 @@ cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVSTwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4218,11 +4188,10 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVSTwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4234,7 +4203,6 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// Parse an ARM memory expression, return false if successful else return true @@ -5377,6 +5345,25 @@ validateInstruction(MCInst &Inst, "in register list"); break; } + case ARM::tMUL: { + // The second source operand must be the same register as the destination + // operand. + // + // In this case, we must directly check the parsed operands because the + // cvtThumbMultiply() function is written in such a way that it guarantees + // this first statement is always true for the new Inst. Essentially, the + // destination is unconditionally copied into the second source operand + // without checking to see if it matches what we actually parsed. + if (Operands.size() == 6 && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[5])->getReg()) && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[4])->getReg())) { + return Error(Operands[3]->getStartLoc(), + "destination register must match source register"); + } + break; + } // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2, // so only issue a diagnostic for thumb1. The instructions will be // switched to the t2 encodings in processInstruction() if necessary. @@ -7475,9 +7462,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { MCInst Inst; + unsigned Kind; unsigned ErrorInfo; unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo); + + MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo); switch (MatchResult) { default: break; case Match_Success: @@ -7540,9 +7529,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, case Match_MnemonicFail: return Error(IDLoc, "invalid instruction", ((ARMOperand*)Operands[0])->getLocRange()); - case Match_ConversionFail: - // The converter function will have already emitted a diagnostic. - return true; case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index c90751d..57642e1 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2701,6 +2701,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, unsigned align = fieldFromInstruction(Insn, 4, 1); unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 0 && align == 1) + return MCDisassembler::Fail; align *= (1 << size); switch (Inst.getOpcode()) { @@ -2831,6 +2833,8 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, unsigned align = fieldFromInstruction(Insn, 4, 1); if (size == 0x3) { + if (align == 0) + return MCDisassembler::Fail; size = 4; align = 16; } else { @@ -3170,7 +3174,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, int imm = Val & 0xFF; if (!(Val & 0x100)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm << 2)); + Inst.addOperand(MCOperand::CreateImm(imm * 4)); } return MCDisassembler::Success; @@ -3710,8 +3714,16 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED index = fieldFromInstruction(Insn, 7, 1); - if (fieldFromInstruction(Insn, 4, 2) != 0) - align = 4; + + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0 : + align = 0; break; + case 3: + align = 4; break; + default: + return MCDisassembler::Fail; + } + break; } if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -3769,8 +3781,16 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED index = fieldFromInstruction(Insn, 7, 1); - if (fieldFromInstruction(Insn, 4, 2) != 0) - align = 4; + + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + align = 4; break; + default: + return MCDisassembler::Fail; + } + break; } if (Rm != 0xF) { // Writeback @@ -4090,8 +4110,15 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, inc = 2; break; case 2: - if (fieldFromInstruction(Insn, 4, 2)) - align = 4 << fieldFromInstruction(Insn, 4, 2); + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + return MCDisassembler::Fail; + default: + align = 4 << fieldFromInstruction(Insn, 4, 2); break; + } + index = fieldFromInstruction(Insn, 7, 1); if (fieldFromInstruction(Insn, 6, 1)) inc = 2; @@ -4164,8 +4191,15 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, inc = 2; break; case 2: - if (fieldFromInstruction(Insn, 4, 2)) - align = 4 << fieldFromInstruction(Insn, 4, 2); + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + return MCDisassembler::Fail; + default: + align = 4 << fieldFromInstruction(Insn, 4, 2); break; + } + index = fieldFromInstruction(Insn, 7, 1); if (fieldFromInstruction(Insn, 6, 1)) inc = 2; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 7d6acbc..b53da3b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -194,6 +194,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; + case ARM::fixup_t2_condbranch: + case ARM::fixup_t2_uncondbranch: + Type = ELF::R_ARM_THM_JUMP24; + break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: Type = ELF::R_ARM_MOVT_PREL; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index d32805e..c1aab9c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -50,7 +50,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { Code32Directive = ".code\t32"; WeakRefDirective = "\t.weak\t"; - LCOMMDirectiveType = LCOMM::NoAlignment; HasLEB128 = true; SupportsDebugInformation = true; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 94f1082..1917564 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -783,7 +783,7 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, // Immediate is always encoded as positive. The 'U' bit controls add vs sub. if (Imm8 < 0) - Imm8 = -Imm8; + Imm8 = -(uint32_t)Imm8; // Scaled by 4. Imm8 /= 4; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index a51e0fa..95640f7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -410,7 +410,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, if (Type == macho::RIT_ARM_Half) { // The other-half value only gets populated for the movt and movw // relocation entries. - uint32_t Value = 0;; + uint32_t Value = 0; switch ((unsigned)Fixup.getKind()) { default: break; case ARM::fixup_arm_movw_lo16: |