aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2011-01-20 08:34:58 +0000
committerEvan Cheng <evan.cheng@apple.com>2011-01-20 08:34:58 +0000
commit9fe2009956fc40f3aea46fb3c38dcfb61c4aca46 (patch)
treefe7f9182beb36b75873546f190e5a2caa6b49a14 /lib/Target
parent59315d1d54de6d9a8d721542c9ae251a00cff1ed (diff)
downloadexternal_llvm-9fe2009956fc40f3aea46fb3c38dcfb61c4aca46.zip
external_llvm-9fe2009956fc40f3aea46fb3c38dcfb61c4aca46.tar.gz
external_llvm-9fe2009956fc40f3aea46fb3c38dcfb61c4aca46.tar.bz2
Sorry, several patches in one.
TargetInstrInfo: Change produceSameValue() to take MachineRegisterInfo as an optional argument. When in SSA form, targets can use it to make more aggressive equality analysis. Machine LICM: 1. Eliminate isLoadFromConstantMemory, use MI.isInvariantLoad instead. 2. Fix a bug which prevent CSE of instructions which are not re-materializable. 3. Use improved form of produceSameValue. ARM: 1. Teach ARM produceSameValue to look pass some PIC labels. 2. Look for operands from different loads of different constant pool entries which have same values. 3. Re-implement PIC GA materialization using movw + movt. Combine the pair with a "add pc" or "ldr [pc]" to form pseudo instructions. This makes it possible to re-materialize the instruction, allow machine LICM to hoist the set of instructions out of the loop and make it possible to CSE them. It's a bit hacky, but it significantly improve code quality. 4. Some minor bug fixes as well. With the fixes, using movw + movt to materialize GAs significantly outperform the load from constantpool method. 186.crafty and 255.vortex improved > 20%, 254.gap and 176.gcc ~10%. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123905 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp50
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h3
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp313
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp4
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td23
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td17
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td2
-rw-r--r--lib/Target/ARM/ARMSchedule.td2
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td6
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td10
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td6
12 files changed, 275 insertions, 168 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 925569e..ef7458d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -568,7 +568,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 4;
case ARM::MOVi32imm:
case ARM::t2MOVi32imm:
- case ARM::MOV_pic_ga:
return 8;
case ARM::CONSTPOOL_ENTRY:
// If this machine instr is a constant pool entry, its size is recorded as
@@ -1053,12 +1052,16 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
}
bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const {
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
int Opcode = MI0->getOpcode();
- if (Opcode == ARM::t2LDRpci ||
+ if (Opcode == ARM::LDRi12 ||
+ Opcode == ARM::t2LDRpci ||
Opcode == ARM::t2LDRpci_pic ||
Opcode == ARM::tLDRpci ||
- Opcode == ARM::tLDRpci_pic) {
+ Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::MOV_pic_ga_add_pc ||
+ Opcode == ARM::t2MOV_pic_ga_add_pc) {
if (MI1->getOpcode() != Opcode)
return false;
if (MI0->getNumOperands() != MI1->getNumOperands())
@@ -1066,9 +1069,17 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
const MachineOperand &MO0 = MI0->getOperand(1);
const MachineOperand &MO1 = MI1->getOperand(1);
+ if (Opcode == ARM::LDRi12 && (!MO0.isCPI() || !MO1.isCPI()))
+ return false;
+
if (MO0.getOffset() != MO1.getOffset())
return false;
+ if (Opcode == ARM::MOV_pic_ga_add_pc ||
+ Opcode == ARM::t2MOV_pic_ga_add_pc)
+ // Ignore the PC labels.
+ return MO0.getGlobal() == MO1.getGlobal();
+
const MachineFunction *MF = MI0->getParent()->getParent();
const MachineConstantPool *MCP = MF->getConstantPool();
int CPI0 = MO0.getIndex();
@@ -1080,6 +1091,37 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
ARMConstantPoolValue *ACPV1 =
static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
return ACPV0->hasSameValue(ACPV1);
+ } else if (Opcode == ARM::PICLDR) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ unsigned Addr0 = MI0->getOperand(1).getReg();
+ unsigned Addr1 = MI1->getOperand(1).getReg();
+ if (Addr0 != Addr1) {
+ if (!MRI ||
+ !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+ !TargetRegisterInfo::isVirtualRegister(Addr1))
+ return false;
+
+ // This assumes SSA form.
+ MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+ MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+ // Check if the loaded value, e.g. a constantpool of a global address, are
+ // the same.
+ if (!produceSameValue(Def0, Def1, MRI))
+ return false;
+ }
+
+ for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+ // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+ const MachineOperand &MO0 = MI0->getOperand(i);
+ const MachineOperand &MO1 = MI1->getOperand(i);
+ if (!MO0.isIdenticalTo(MO1))
+ return false;
+ }
+ return true;
}
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0ea8a96..1fb8872 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -298,7 +298,8 @@ public:
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
virtual bool produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const;
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const;
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
/// determine if two loads are loading from the same base address. It should
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2dde617..29d4e1c 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -38,6 +38,7 @@ namespace {
const ARMBaseInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -48,12 +49,16 @@ namespace {
private:
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+ bool ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool ExpandMBB(MachineBasicBlock &MBB);
void ExpandVLD(MachineBasicBlock::iterator &MBBI);
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool IsExt, unsigned NumRegs);
+ void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
};
char ARMExpandPseudo::ID = 0;
}
@@ -612,21 +617,85 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MI.eraseFromParent();
}
-bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
- bool Modified = false;
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+ const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ MachineInstrBuilder LO16, HI16;
+
+ if (!STI->hasV6T2Ops() &&
+ (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+ unsigned ImmVal = (unsigned)MO.getImm();
+ unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ LO16 = LO16.addImm(SOImmValV1);
+ HI16 = HI16.addImm(SOImmValV2);
+ (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg).addReg(0);
+ HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ return;
+ }
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- while (MBBI != E) {
- MachineInstr &MI = *MBBI;
- MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ unsigned LO16Opc = 0;
+ unsigned HI16Opc = 0;
+ if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+ LO16Opc = ARM::t2MOVi16;
+ HI16Opc = ARM::t2MOVTi16;
+ } else {
+ LO16Opc = ARM::MOVi16;
+ HI16Opc = ARM::MOVTi16;
+ }
- bool ModifiedOp = true;
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- default:
- ModifiedOp = false;
- break;
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ }
+ (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ return false;
case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
@@ -636,7 +705,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
// pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
// for us. Otherwise, expand to nothing.
if (RI.hasBasePointer(MF)) {
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int32_t NumBytes = AFI->getFramePtrSpillOffset();
unsigned FramePtr = RI.getFrameRegister(MF);
assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
@@ -670,7 +738,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
MI.eraseFromParent();
- break;
+ return true;
}
case ARM::MOVsrl_flag:
@@ -678,26 +746,26 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
// These are just fancy MOVs insructions.
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addReg(0)
- .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
- : ARM_AM::asr), 1)))
- .addReg(ARM::CPSR, RegState::Define);
+ .addOperand(MI.getOperand(1))
+ .addReg(0)
+ .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
+ : ARM_AM::asr), 1)))
+ .addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
- break;
+ return true;
}
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(1))
- .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
.addReg(0);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
- break;
+ return true;
}
case ARM::TPsoft: {
MachineInstrBuilder MIB =
@@ -708,7 +776,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
(*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
- break;
+ return true;
}
case ARM::t2LDRHpci:
case ARM::t2LDRBpci:
@@ -733,13 +801,14 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(NewLdOpc), DstReg)
- .addReg(ARM::PC)
- .addOperand(MI.getOperand(1)));
+ .addReg(ARM::PC)
+ .addOperand(MI.getOperand(1)));
(*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
- break;
+ return true;
}
+
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
@@ -748,7 +817,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(NewLdOpc), DstReg);
+ TII->get(NewLdOpc), DstReg);
if (Opcode == ARM::t2LDRpci_pic) MIB1.addReg(ARM::PC);
MIB1.addOperand(MI.getOperand(1));
AddDefaultPred(MIB1);
@@ -760,103 +829,56 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addOperand(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
- break;
+ return true;
}
- case ARM::MOVi32imm:
- case ARM::MOVCCi32imm:
- case ARM::MOV_pic_ga:
- case ARM::t2MOVi32imm:
- case ARM::t2MOVCCi32imm:
- case ARM::t2MOV_pic_ga: {
- unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ case ARM::MOV_pic_ga_add_pc:
+ case ARM::MOV_pic_ga_ldr:
+ case ARM::t2MOV_pic_ga_add_pc: {
+ // Expand into movw + movw + add pc / ldr [pc]
+ unsigned LabelId = AFI->createPICLabelUId();
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
- bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
- bool isPIC_GA = (Opcode == ARM::t2MOV_pic_ga || Opcode == ARM::MOV_pic_ga);
- const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
- MachineInstrBuilder LO16, HI16;
-
- if (!STI->hasV6T2Ops() &&
- (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
- // Expand into a movi + orr.
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg);
-
- assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
- unsigned ImmVal = (unsigned)MO.getImm();
- unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
- unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
- LO16 = LO16.addImm(SOImmValV1);
- HI16 = HI16.addImm(SOImmValV2);
- (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- LO16.addImm(Pred).addReg(PredReg).addReg(0);
- HI16.addImm(Pred).addReg(PredReg).addReg(0);
- TransferImpOps(MI, LO16, HI16);
- MI.eraseFromParent();
- break;
- }
-
- unsigned LO16Opc = 0;
- unsigned HI16Opc = 0;
- if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
- LO16Opc = ARM::t2MOVi16;
- HI16Opc = ARM::t2MOVTi16;
- } else if (Opcode == ARM::MOV_pic_ga) {
- LO16Opc = ARM::MOVi16_pic_ga;
- HI16Opc = ARM::MOVTi16_pic_ga;
- } else if (Opcode == ARM::t2MOV_pic_ga) {
- LO16Opc = ARM::t2MOVi16_pic_ga;
- HI16Opc = ARM::t2MOVTi16_pic_ga;
- } else {
- LO16Opc = ARM::MOVi16;
- HI16Opc = ARM::MOVTi16;
- }
-
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ const MachineOperand &MO1 = MI.getOperand(1);
+ const GlobalValue *GV = MO1.getGlobal();
+ unsigned TF = MO1.getTargetFlags();
+ bool isARM = Opcode != ARM::t2MOV_pic_ga_add_pc;
+ unsigned LO16Opc = isARM ? ARM::MOVi16_pic_ga : ARM::t2MOVi16_pic_ga;
+ unsigned HI16Opc = isARM ? ARM::MOVTi16_pic_ga : ARM::t2MOVTi16_pic_ga;
+ unsigned PICAddOpc = isARM
+ ? (Opcode == ARM::MOV_pic_ga_ldr ? ARM::PICLDR : ARM::PICADD)
+ : ARM::tPICADD;
+ MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(LO16Opc), DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(),
+ TF | ARMII::MO_LO16_NONLAZY_PIC)
+ .addImm(LabelId);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
+ .addReg(DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(),
+ TF | ARMII::MO_HI16_NONLAZY_PIC)
+ .addImm(LabelId);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(PICAddOpc))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg);
-
- if (MO.isImm()) {
- unsigned Imm = MO.getImm();
- unsigned Lo16 = Imm & 0xffff;
- unsigned Hi16 = (Imm >> 16) & 0xffff;
- LO16 = LO16.addImm(Lo16);
- HI16 = HI16.addImm(Hi16);
- } else if (isPIC_GA) {
- unsigned LabelId = MI.getOperand(2).getImm();
- const GlobalValue *GV = MO.getGlobal();
- unsigned TF = MO.getTargetFlags();
- LO16 = LO16.addGlobalAddress(GV, MO.getOffset(),
- TF | ARMII::MO_LO16_NONLAZY_PIC)
- .addImm(LabelId);
- HI16 = HI16.addGlobalAddress(GV, MO.getOffset(),
- TF | ARMII::MO_HI16_NONLAZY_PIC)
- .addImm(LabelId);
- } else {
- const GlobalValue *GV = MO.getGlobal();
- unsigned TF = MO.getTargetFlags();
- LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
- HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
- }
-
- (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-
- if (!isPIC_GA) {
- LO16.addImm(Pred).addReg(PredReg);
- HI16.addImm(Pred).addReg(PredReg);
+ .addReg(DstReg).addImm(LabelId);
+ if (isARM) {
+ AddDefaultPred(MIB2);
+ if (Opcode == ARM::MOV_pic_ga_ldr)
+ (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
- TransferImpOps(MI, LO16, HI16);
+ TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
- break;
+ return true;
}
+ case ARM::MOVi32imm:
+ case ARM::MOVCCi32imm:
+ case ARM::t2MOVi32imm:
+ case ARM::t2MOVCCi32imm:
+ ExpandMOV32BitImm(MBB, MBBI);
+ return true;
+
case ARM::VMOVQQ: {
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
@@ -869,18 +891,18 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstrBuilder Even =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(EvenDst,
- RegState::Define | getDeadRegState(DstIsDead))
- .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ .addReg(EvenDst,
+ RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
MachineInstrBuilder Odd =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(OddDst,
- RegState::Define | getDeadRegState(DstIsDead))
- .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ .addReg(OddDst,
+ RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
- break;
+ return true;
}
case ARM::VLDMQIA:
@@ -911,7 +933,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
- break;
+ return true;
}
case ARM::VSTMQIA:
@@ -943,7 +965,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
- break;
+ return true;
}
case ARM::VDUPfqf:
case ARM::VDUPfdf:{
@@ -954,7 +976,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
unsigned SrcReg = MI.getOperand(1).getReg();
unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
- Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
+ Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
// The lane is [0,1] for the containing DReg superregister.
// Copy the dst/src register operands.
MIB.addOperand(MI.getOperand(OpIdx++));
@@ -968,7 +990,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
- break;
+ return true;
}
case ARM::VLD1q8Pseudo:
@@ -1044,7 +1066,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VLD4DUPd16Pseudo_UPD:
case ARM::VLD4DUPd32Pseudo_UPD:
ExpandVLD(MBBI);
- break;
+ return true;
case ARM::VST1q8Pseudo:
case ARM::VST1q16Pseudo:
@@ -1095,7 +1117,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VST4q16oddPseudo_UPD:
case ARM::VST4q32oddPseudo_UPD:
ExpandVST(MBBI);
- break;
+ return true;
case ARM::VLD1LNq8Pseudo:
case ARM::VLD1LNq16Pseudo:
@@ -1170,18 +1192,26 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VST4LNq16Pseudo_UPD:
case ARM::VST4LNq32Pseudo_UPD:
ExpandLaneOp(MBBI);
- break;
-
- case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); break;
- case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); break;
- case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); break;
- case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break;
- case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break;
- case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break;
- }
+ return true;
+
+ case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
+ case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ }
+
+ return false;
+}
- if (ModifiedOp)
- Modified = true;
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ Modified |= ExpandMI(MBB, MBBI);
MBBI = NMBBI;
}
@@ -1192,6 +1222,7 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
TRI = MF.getTarget().getRegisterInfo();
STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+ AFI = MF.getInfo<ARMFunctionInfo>();
bool Modified = false;
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 299174b..7e1f046 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -880,8 +880,8 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
Offset = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i32);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ MVT::i32);
return true;
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index f4d16fc..a6e1fe7 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2003,13 +2003,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
- // FIXME: Not a constant pool!
- unsigned PICLabelIndex = AFI->createPICLabelUId();
- SDValue PICLabel = DAG.getConstant(PICLabelIndex, MVT::i32);
SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT),
- PICLabel);
- Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, 0);
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 8018fe7..b5ac6f8 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -70,7 +70,7 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
-def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntBinOp>;
+def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -3408,11 +3408,22 @@ def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
[(set GPR:$dst, (arm_i32imm:$src))]>,
Requires<[IsARM]>;
-let isReMaterializable = 1 in
-def MOV_pic_ga : PseudoInst<(outs GPR:$dst),
- (ins i32imm:$addr, pclabel:$id), IIC_iMOVix2,
- [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr, imm:$id))]>,
- Requires<[IsARM, UseMovt]>;
+// Pseudo instruction that combines movw + movt + add pc.
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_pic_ga_add_pc : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_pic_ga_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2ld,
+ [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e9c7513..4954a7c 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3245,12 +3245,15 @@ def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
[(set rGPR:$dst, (i32 imm:$src))]>,
Requires<[IsThumb, HasV6T2]>;
-// Materialize GA with movw + movt.
+// Pseudo instruction that combines movw + movt + add pc.
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
let isReMaterializable = 1 in
-def t2MOV_pic_ga : PseudoInst<(outs rGPR:$dst),
- (ins i32imm:$addr, pclabel:$id), IIC_iMOVix2,
- [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr, imm:$id))]>,
- Requires<[IsThumb2, UseMovt]>;
+def t2MOV_pic_ga_add_pc : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@@ -3266,9 +3269,9 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// be expanded into two instructions late to allow if-conversion and
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
-def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
IIC_iLoadiALU,
- [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 8e8587a..22d15b5 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -260,7 +260,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
// restricted GPR register class. Many Thumb2 instructions allow the full
// register range for operands, but have undefined behaviours when PC
-// or SP (R13 or R15) are used. The ARM ARM refers to these operands
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
// via the BadReg() pseudo-code description.
def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
R7, R8, R9, R10, R11, R12, LR]> {
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 61dd3be..958c5c6 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -38,6 +38,8 @@ def IIC_iMOVr : InstrItinClass;
def IIC_iMOVsi : InstrItinClass;
def IIC_iMOVsr : InstrItinClass;
def IIC_iMOVix2 : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld : InstrItinClass;
def IIC_iMVNi : InstrItinClass;
def IIC_iMVNr : InstrItinClass;
def IIC_iMVNsi : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 33ba683..8d86c01 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -71,6 +71,12 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LSPipe]>], [5]>,
//
// Move instructions, conditional
InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index a9632be..82c6735 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -50,6 +50,16 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_ALU0, A9_ALU1]>,
InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [5]>,
//
// MVN instructions
InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 4c0e496..c1880a7 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -64,6 +64,12 @@ def ARMV6Itineraries : ProcessorItineraries<
InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>,
InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [5]>,
//
// Move instructions, conditional
InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,