diff options
author | Evan Cheng <evan.cheng@apple.com> | 2011-01-20 08:34:58 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2011-01-20 08:34:58 +0000 |
commit | 9fe2009956fc40f3aea46fb3c38dcfb61c4aca46 (patch) | |
tree | fe7f9182beb36b75873546f190e5a2caa6b49a14 /lib/Target | |
parent | 59315d1d54de6d9a8d721542c9ae251a00cff1ed (diff) | |
download | external_llvm-9fe2009956fc40f3aea46fb3c38dcfb61c4aca46.zip external_llvm-9fe2009956fc40f3aea46fb3c38dcfb61c4aca46.tar.gz external_llvm-9fe2009956fc40f3aea46fb3c38dcfb61c4aca46.tar.bz2 |
Sorry, several patches in one.
TargetInstrInfo:
Change produceSameValue() to take MachineRegisterInfo as an optional argument.
When in SSA form, targets can use it to make more aggressive equality analysis.
Machine LICM:
1. Eliminate isLoadFromConstantMemory, use MI.isInvariantLoad instead.
2. Fix a bug which prevent CSE of instructions which are not re-materializable.
3. Use improved form of produceSameValue.
ARM:
1. Teach ARM produceSameValue to look pass some PIC labels.
2. Look for operands from different loads of different constant pool entries
which have same values.
3. Re-implement PIC GA materialization using movw + movt. Combine the pair with
a "add pc" or "ldr [pc]" to form pseudo instructions. This makes it possible
to re-materialize the instruction, allow machine LICM to hoist the set of
instructions out of the loop and make it possible to CSE them. It's a bit
hacky, but it significantly improve code quality.
4. Some minor bug fixes as well.
With the fixes, using movw + movt to materialize GAs significantly outperform the
load from constantpool method. 186.crafty and 255.vortex improved > 20%, 254.gap
and 176.gcc ~10%.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123905 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 50 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 313 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 7 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 23 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 17 | ||||
-rw-r--r-- | lib/Target/ARM/ARMRegisterInfo.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA8.td | 6 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 10 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV6.td | 6 |
12 files changed, 275 insertions, 168 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 925569e..ef7458d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -568,7 +568,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 4; case ARM::MOVi32imm: case ARM::t2MOVi32imm: - case ARM::MOV_pic_ga: return 8; case ARM::CONSTPOOL_ENTRY: // If this machine instr is a constant pool entry, its size is recorded as @@ -1053,12 +1052,16 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { } bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const { + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { int Opcode = MI0->getOpcode(); - if (Opcode == ARM::t2LDRpci || + if (Opcode == ARM::LDRi12 || + Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || - Opcode == ARM::tLDRpci_pic) { + Opcode == ARM::tLDRpci_pic || + Opcode == ARM::MOV_pic_ga_add_pc || + Opcode == ARM::t2MOV_pic_ga_add_pc) { if (MI1->getOpcode() != Opcode) return false; if (MI0->getNumOperands() != MI1->getNumOperands()) @@ -1066,9 +1069,17 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, const MachineOperand &MO0 = MI0->getOperand(1); const MachineOperand &MO1 = MI1->getOperand(1); + if (Opcode == ARM::LDRi12 && (!MO0.isCPI() || !MO1.isCPI())) + return false; + if (MO0.getOffset() != MO1.getOffset()) return false; + if (Opcode == ARM::MOV_pic_ga_add_pc || + Opcode == ARM::t2MOV_pic_ga_add_pc) + // Ignore the PC labels. + return MO0.getGlobal() == MO1.getGlobal(); + const MachineFunction *MF = MI0->getParent()->getParent(); const MachineConstantPool *MCP = MF->getConstantPool(); int CPI0 = MO0.getIndex(); @@ -1080,6 +1091,37 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, ARMConstantPoolValue *ACPV1 = static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); return ACPV0->hasSameValue(ACPV1); + } else if (Opcode == ARM::PICLDR) { + if (MI1->getOpcode() != Opcode) + return false; + if (MI0->getNumOperands() != MI1->getNumOperands()) + return false; + + unsigned Addr0 = MI0->getOperand(1).getReg(); + unsigned Addr1 = MI1->getOperand(1).getReg(); + if (Addr0 != Addr1) { + if (!MRI || + !TargetRegisterInfo::isVirtualRegister(Addr0) || + !TargetRegisterInfo::isVirtualRegister(Addr1)) + return false; + + // This assumes SSA form. + MachineInstr *Def0 = MRI->getVRegDef(Addr0); + MachineInstr *Def1 = MRI->getVRegDef(Addr1); + // Check if the loaded value, e.g. a constantpool of a global address, are + // the same. + if (!produceSameValue(Def0, Def1, MRI)) + return false; + } + + for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { + // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg + const MachineOperand &MO0 = MI0->getOperand(i); + const MachineOperand &MO1 = MI1->getOperand(i); + if (!MO0.isIdenticalTo(MO1)) + return false; + } + return true; } return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 0ea8a96..1fb8872 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -298,7 +298,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const; + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const; /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to /// determine if two loads are loading from the same base address. It should diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2dde617..29d4e1c 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -38,6 +38,7 @@ namespace { const ARMBaseInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; + ARMFunctionInfo *AFI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -48,12 +49,16 @@ namespace { private: void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); + bool ExpandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool ExpandMBB(MachineBasicBlock &MBB); void ExpandVLD(MachineBasicBlock::iterator &MBBI); void ExpandVST(MachineBasicBlock::iterator &MBBI); void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); void ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned Opc, bool IsExt, unsigned NumRegs); + void ExpandMOV32BitImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI); }; char ARMExpandPseudo::ID = 0; } @@ -612,21 +617,85 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MI.eraseFromParent(); } -bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { - bool Modified = false; +void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; + const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); + MachineInstrBuilder LO16, HI16; + + if (!STI->hasV6T2Ops() && + (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { + // Expand into a movi + orr. + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); + unsigned ImmVal = (unsigned)MO.getImm(); + unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); + unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); + LO16 = LO16.addImm(SOImmValV1); + HI16 = HI16.addImm(SOImmValV2); + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg).addReg(0); + HI16.addImm(Pred).addReg(PredReg).addReg(0); + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); + return; + } - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineInstr &MI = *MBBI; - MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + unsigned LO16Opc = 0; + unsigned HI16Opc = 0; + if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { + LO16Opc = ARM::t2MOVi16; + HI16Opc = ARM::t2MOVTi16; + } else { + LO16Opc = ARM::MOVi16; + HI16Opc = ARM::MOVTi16; + } - bool ModifiedOp = true; - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - default: - ModifiedOp = false; - break; + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + } + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); +} + +bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: + return false; case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = @@ -636,7 +705,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it // for us. Otherwise, expand to nothing. if (RI.hasBasePointer(MF)) { - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int32_t NumBytes = AFI->getFramePtrSpillOffset(); unsigned FramePtr = RI.getFrameRegister(MF); assert(MF.getTarget().getFrameLowering()->hasFP(MF) && @@ -670,7 +738,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } MI.eraseFromParent(); - break; + return true; } case ARM::MOVsrl_flag: @@ -678,26 +746,26 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { // These are just fancy MOVs insructions. AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addReg(0) - .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr - : ARM_AM::asr), 1))) - .addReg(ARM::CPSR, RegState::Define); + .addOperand(MI.getOperand(1)) + .addReg(0) + .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr + : ARM_AM::asr), 1))) + .addReg(ARM::CPSR, RegState::Define); MI.eraseFromParent(); - break; + return true; } case ARM::RRX: { // This encodes as "MOVs Rd, Rm, rrx MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(1)) - .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(1)) + .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) .addReg(0); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::TPsoft: { MachineInstrBuilder MIB = @@ -708,7 +776,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::t2LDRHpci: case ARM::t2LDRBpci: @@ -733,13 +801,14 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) - .addReg(ARM::PC) - .addOperand(MI.getOperand(1))); + .addReg(ARM::PC) + .addOperand(MI.getOperand(1))); (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } + case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) @@ -748,7 +817,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(NewLdOpc), DstReg); + TII->get(NewLdOpc), DstReg); if (Opcode == ARM::t2LDRpci_pic) MIB1.addReg(ARM::PC); MIB1.addOperand(MI.getOperand(1)); AddDefaultPred(MIB1); @@ -760,103 +829,56 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { .addOperand(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); - break; + return true; } - case ARM::MOVi32imm: - case ARM::MOVCCi32imm: - case ARM::MOV_pic_ga: - case ARM::t2MOVi32imm: - case ARM::t2MOVCCi32imm: - case ARM::t2MOV_pic_ga: { - unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + case ARM::MOV_pic_ga_add_pc: + case ARM::MOV_pic_ga_ldr: + case ARM::t2MOV_pic_ga_add_pc: { + // Expand into movw + movw + add pc / ldr [pc] + unsigned LabelId = AFI->createPICLabelUId(); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); - bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; - bool isPIC_GA = (Opcode == ARM::t2MOV_pic_ga || Opcode == ARM::MOV_pic_ga); - const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); - MachineInstrBuilder LO16, HI16; - - if (!STI->hasV6T2Ops() && - (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { - // Expand into a movi + orr. - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg); - - assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); - unsigned ImmVal = (unsigned)MO.getImm(); - unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); - unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); - LO16 = LO16.addImm(SOImmValV1); - HI16 = HI16.addImm(SOImmValV2); - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - LO16.addImm(Pred).addReg(PredReg).addReg(0); - HI16.addImm(Pred).addReg(PredReg).addReg(0); - TransferImpOps(MI, LO16, HI16); - MI.eraseFromParent(); - break; - } - - unsigned LO16Opc = 0; - unsigned HI16Opc = 0; - if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { - LO16Opc = ARM::t2MOVi16; - HI16Opc = ARM::t2MOVTi16; - } else if (Opcode == ARM::MOV_pic_ga) { - LO16Opc = ARM::MOVi16_pic_ga; - HI16Opc = ARM::MOVTi16_pic_ga; - } else if (Opcode == ARM::t2MOV_pic_ga) { - LO16Opc = ARM::t2MOVi16_pic_ga; - HI16Opc = ARM::t2MOVTi16_pic_ga; - } else { - LO16Opc = ARM::MOVi16; - HI16Opc = ARM::MOVTi16; - } - - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) + const MachineOperand &MO1 = MI.getOperand(1); + const GlobalValue *GV = MO1.getGlobal(); + unsigned TF = MO1.getTargetFlags(); + bool isARM = Opcode != ARM::t2MOV_pic_ga_add_pc; + unsigned LO16Opc = isARM ? ARM::MOVi16_pic_ga : ARM::t2MOVi16_pic_ga; + unsigned HI16Opc = isARM ? ARM::MOVTi16_pic_ga : ARM::t2MOVTi16_pic_ga; + unsigned PICAddOpc = isARM + ? (Opcode == ARM::MOV_pic_ga_ldr ? ARM::PICLDR : ARM::PICADD) + : ARM::tPICADD; + MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(LO16Opc), DstReg) + .addGlobalAddress(GV, MO1.getOffset(), + TF | ARMII::MO_LO16_NONLAZY_PIC) + .addImm(LabelId); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) + .addReg(DstReg) + .addGlobalAddress(GV, MO1.getOffset(), + TF | ARMII::MO_HI16_NONLAZY_PIC) + .addImm(LabelId); + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(PICAddOpc)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg); - - if (MO.isImm()) { - unsigned Imm = MO.getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - LO16 = LO16.addImm(Lo16); - HI16 = HI16.addImm(Hi16); - } else if (isPIC_GA) { - unsigned LabelId = MI.getOperand(2).getImm(); - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), - TF | ARMII::MO_LO16_NONLAZY_PIC) - .addImm(LabelId); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), - TF | ARMII::MO_HI16_NONLAZY_PIC) - .addImm(LabelId); - } else { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); - } - - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - - if (!isPIC_GA) { - LO16.addImm(Pred).addReg(PredReg); - HI16.addImm(Pred).addReg(PredReg); + .addReg(DstReg).addImm(LabelId); + if (isARM) { + AddDefaultPred(MIB2); + if (Opcode == ARM::MOV_pic_ga_ldr) + (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } - TransferImpOps(MI, LO16, HI16); + TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); - break; + return true; } + case ARM::MOVi32imm: + case ARM::MOVCCi32imm: + case ARM::t2MOVi32imm: + case ARM::t2MOVCCi32imm: + ExpandMOV32BitImm(MBB, MBBI); + return true; + case ARM::VMOVQQ: { unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); @@ -869,18 +891,18 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstrBuilder Even = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(EvenDst, - RegState::Define | getDeadRegState(DstIsDead)) - .addReg(EvenSrc, getKillRegState(SrcIsKill))); + .addReg(EvenDst, + RegState::Define | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); MachineInstrBuilder Odd = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(OddDst, - RegState::Define | getDeadRegState(DstIsDead)) - .addReg(OddSrc, getKillRegState(SrcIsKill))); + .addReg(OddDst, + RegState::Define | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); - break; + return true; } case ARM::VLDMQIA: @@ -911,7 +933,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::VSTMQIA: @@ -943,7 +965,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::VDUPfqf: case ARM::VDUPfdf:{ @@ -954,7 +976,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, - Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); + Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); // The lane is [0,1] for the containing DReg superregister. // Copy the dst/src register operands. MIB.addOperand(MI.getOperand(OpIdx++)); @@ -968,7 +990,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); - break; + return true; } case ARM::VLD1q8Pseudo: @@ -1044,7 +1066,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VLD4DUPd16Pseudo_UPD: case ARM::VLD4DUPd32Pseudo_UPD: ExpandVLD(MBBI); - break; + return true; case ARM::VST1q8Pseudo: case ARM::VST1q16Pseudo: @@ -1095,7 +1117,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VST4q16oddPseudo_UPD: case ARM::VST4q32oddPseudo_UPD: ExpandVST(MBBI); - break; + return true; case ARM::VLD1LNq8Pseudo: case ARM::VLD1LNq16Pseudo: @@ -1170,18 +1192,26 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { case ARM::VST4LNq16Pseudo_UPD: case ARM::VST4LNq32Pseudo_UPD: ExpandLaneOp(MBBI); - break; - - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); break; - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); break; - case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); break; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break; - case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break; - case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break; - } + return true; + + case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true; + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true; + case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true; + case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true; + case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true; + case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true; + } + + return false; +} - if (ModifiedOp) - Modified = true; +bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + Modified |= ExpandMI(MBB, MBBI); MBBI = NMBBI; } @@ -1192,6 +1222,7 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); TRI = MF.getTarget().getRegisterInfo(); STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); + AFI = MF.getInfo<ARMFunctionInfo>(); bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 299174b..7e1f046 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -880,8 +880,8 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { Offset = N.getOperand(0); SDValue N1 = N.getOperand(1); - Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), - MVT::i32); + Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), + MVT::i32); return true; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index f4d16fc..a6e1fe7 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2003,13 +2003,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - // FIXME: Not a constant pool! - unsigned PICLabelIndex = AFI->createPICLabelUId(); - SDValue PICLabel = DAG.getConstant(PICLabelIndex, MVT::i32); SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT), - PICLabel); - Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + DAG.getTargetGlobalAddress(GV, dl, PtrVT)); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(), false, false, 0); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 8018fe7..b5ac6f8 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -70,7 +70,7 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; -def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntBinOp>; +def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -3408,11 +3408,22 @@ def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set GPR:$dst, (arm_i32imm:$src))]>, Requires<[IsARM]>; -let isReMaterializable = 1 in -def MOV_pic_ga : PseudoInst<(outs GPR:$dst), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVix2, - [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr, imm:$id))]>, - Requires<[IsARM, UseMovt]>; +// Pseudo instruction that combines movw + movt + add pc. +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. +let isReMaterializable = 1 in { +def MOV_pic_ga_add_pc : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, + [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + +let AddedComplexity = 10 in +def MOV_pic_ga_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2ld, + [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, UseMovt]>; +} // isReMaterializable // ConstantPool, GlobalAddress, and JumpTable def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index e9c7513..4954a7c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3245,12 +3245,15 @@ def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set rGPR:$dst, (i32 imm:$src))]>, Requires<[IsThumb, HasV6T2]>; -// Materialize GA with movw + movt. +// Pseudo instruction that combines movw + movt + add pc. +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. let isReMaterializable = 1 in -def t2MOV_pic_ga : PseudoInst<(outs rGPR:$dst), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVix2, - [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr, imm:$id))]>, - Requires<[IsThumb2, UseMovt]>; +def t2MOV_pic_ga_add_pc : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2, + [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsThumb2, UseMovt]>; // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, @@ -3266,9 +3269,9 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // be expanded into two instructions late to allow if-conversion and // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in -def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), +def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), IIC_iLoadiALU, - [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), + [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 8e8587a..22d15b5 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -260,7 +260,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, // restricted GPR register class. Many Thumb2 instructions allow the full // register range for operands, but have undefined behaviours when PC -// or SP (R13 or R15) are used. The ARM ARM refers to these operands +// or SP (R13 or R15) are used. The ARM ISA refers to these operands // via the BadReg() pseudo-code description. def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR]> { diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 61dd3be..958c5c6 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -38,6 +38,8 @@ def IIC_iMOVr : InstrItinClass; def IIC_iMOVsi : InstrItinClass; def IIC_iMOVsr : InstrItinClass; def IIC_iMOVix2 : InstrItinClass; +def IIC_iMOVix2addpc : InstrItinClass; +def IIC_iMOVix2ld : InstrItinClass; def IIC_iMVNi : InstrItinClass; def IIC_iMVNr : InstrItinClass; def IIC_iMVNsi : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 33ba683..8d86c01 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -71,6 +71,12 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>, + InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LSPipe]>], [5]>, // // Move instructions, conditional InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index a9632be..82c6735 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -50,6 +50,16 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_ALU0, A9_ALU1]>, InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>, + InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], [5]>, // // MVN instructions InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 4c0e496..c1880a7 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -64,6 +64,12 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>, InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [3]>, + InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [5]>, // // Move instructions, conditional InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>, |