aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp11
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp76
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp130
-rw-r--r--lib/Target/ARM/ARMISelLowering.h7
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td30
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td38
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp20
-rw-r--r--lib/Target/ARM/README-Thumb2.txt4
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp98
-rw-r--r--test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll24
-rw-r--r--test/CodeGen/Thumb2/large-stack.ll6
12 files changed, 373 insertions, 73 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9c5f3aa..911b84d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -688,7 +688,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned OpNum = Ops[0];
unsigned Opc = MI->getOpcode();
MachineInstr *NewMI = NULL;
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
+ if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { // FIXME: tMOVgpr2gpr etc.?
// If it is updating CPSR, then it cannot be folded.
if (MI->getOperand(4).getReg() != ARM::CPSR || MI->getOperand(4).isDead()) {
unsigned Pred = MI->getOperand(2).getImm();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 97b9ad1..a72e9dd 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1289,11 +1289,12 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getDPRCalleeSavedAreaOffset()||
hasFP(MF)) {
if (NumBytes) {
- unsigned SUBriOpc = isARM ? ARM::SUBri : ARM::t2SUBri;
- BuildMI(MBB, MBBI, dl, TII.get(SUBriOpc), ARM::SP)
- .addReg(FramePtr)
- .addImm(NumBytes)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
} else {
// Thumb2 or ARM.
unsigned MOVrOpc = isARM ? ARM::MOVr : ARM::t2MOVr;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index d996b24..04f4dd3 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -122,6 +122,8 @@ private:
SDNode *SelectARMIndexedLoad(SDValue Op);
SDNode *SelectT2IndexedLoad(SDValue Op);
+ /// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
+ SDNode *SelectDYN_ALLOC(SDValue Op);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -868,6 +870,59 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
return NULL;
}
+SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = Op.getValueType();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
+ SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
+ int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
+ if (AlignVal < 0)
+ // We need to align the stack. Use Thumb1 tAND which is the only thumb
+ // instruction that can read and write SP. This matches to a pseudo
+ // instruction that has a chain to ensure the result is written back to
+ // the stack pointer.
+ SP = SDValue(CurDAG->getTargetNode(ARM::tANDsp, dl, VT, SP, Align), 0);
+
+ bool isC = isa<ConstantSDNode>(Size);
+ uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL;
+ // Handle the most common case for both Thumb1 and Thumb2:
+ // tSUBspi - immediate is between 0 ... 508 inclusive.
+ if (C <= 508 && ((C & 3) == 0))
+ // FIXME: tSUBspi encode scale 4 implicitly.
+ return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP,
+ CurDAG->getTargetConstant(C/4, MVT::i32),
+ Chain);
+
+ if (Subtarget->isThumb1Only()) {
+ // Use tADDrSPr since Thumb1 does not have a sub r, sp, r. ARMISelLowering
+ // should have negated the size operand already. FIXME: We can't insert
+ // new target independent node at this stage so we are forced to negate
+ // it earlier. Is there a better solution?
+ return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size,
+ Chain);
+ } else if (Subtarget->isThumb2()) {
+ if (isC && Predicate_t2_so_imm(Size.getNode())) {
+ // t2SUBrSPi
+ SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
+ return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3);
+ } else if (isC && Predicate_imm0_4095(Size.getNode())) {
+ // t2SUBrSPi12
+ SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
+ return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3);
+ } else {
+ // t2SUBrSPs
+ SDValue Ops[] = { SP, Size,
+ getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain };
+ return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4);
+ }
+ }
+
+ // FIXME: Add ADD / SUB sp instructions for ARM.
+ return 0;
+}
SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDNode *N = Op.getNode();
@@ -941,25 +996,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
}
- case ISD::ADD: {
- if (!Subtarget->isThumb1Only())
- break;
- // Select add sp, c to tADDhirr.
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0));
- RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1));
- if (LHSR && LHSR->getReg() == ARM::SP) {
- std::swap(N0, N1);
- std::swap(LHSR, RHSR);
- }
- if (RHSR && RHSR->getReg() == ARM::SP) {
- SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVtgpr2gpr, dl,
- Op.getValueType(), N0, N0),0);
- return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1);
- }
- break;
- }
+ case ARMISD::DYN_ALLOC:
+ return SelectDYN_ALLOC(Op);
case ISD::MUL:
if (Subtarget->isThumb1Only())
break;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0bfe213..4922f7d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -307,7 +307,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ if (Subtarget->isThumb())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
@@ -435,6 +438,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+ case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
@@ -1399,6 +1404,53 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
}
SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ MVT VT = Node->getValueType(0);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (AlignVal > StackAlign)
+ // Do this now since selection pass cannot introduce new target
+ // independent node.
+ Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
+
+ // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
+ // using a "add r, sp, r" instead. Negate the size now so we don't have to
+ // do even more horrible hack later.
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumb1OnlyFunction()) {
+ bool Negate = true;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
+ if (C) {
+ uint32_t Val = C->getZExtValue();
+ if (Val <= 508 && ((Val & 3) == 0))
+ Negate = false;
+ }
+ if (Negate)
+ Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
+ }
+
+ SDVTList VTList = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops1[] = { Chain, Size, Align };
+ SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
+ Chain = Res.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+ SDValue Ops2[] = { Res, Chain };
+ return DAG.getMergeValues(Ops2, 2, dl);
+}
+
+SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) {
@@ -2396,6 +2448,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
@@ -2454,7 +2507,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
switch (MI->getOpcode()) {
- default: assert(false && "Unexpected instr type to insert");
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
case ARM::tMOVCCr: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -2509,6 +2563,78 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
return BB;
}
+
+ case ARM::tANDsp:
+ case ARM::tADDspr_:
+ case ARM::tSUBspi_:
+ case ARM::t2SUBrSPi_:
+ case ARM::t2SUBrSPi12_:
+ case ARM::t2SUBrSPs_: {
+ MachineFunction *MF = BB->getParent();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool DstIsDead = MI->getOperand(0).isDead();
+ bool SrcIsKill = MI->getOperand(1).isKill();
+
+ if (SrcReg != ARM::SP) {
+ // Copy the source to SP from virtual register.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
+ unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
+ ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
+ BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+ }
+
+ unsigned OpOpc = 0;
+ bool NeedPred = false, NeedCC = false, NeedOp3 = false;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected pseudo instruction!");
+ case ARM::tANDsp:
+ OpOpc = ARM::tAND;
+ NeedPred = true;
+ break;
+ case ARM::tADDspr_:
+ OpOpc = ARM::tADDspr;
+ break;
+ case ARM::tSUBspi_:
+ OpOpc = ARM::tSUBspi;
+ break;
+ case ARM::t2SUBrSPi_:
+ OpOpc = ARM::t2SUBrSPi;
+ NeedPred = true; NeedCC = true;
+ break;
+ case ARM::t2SUBrSPi12_:
+ OpOpc = ARM::t2SUBrSPi12;
+ NeedPred = true;
+ break;
+ case ARM::t2SUBrSPs_:
+ OpOpc = ARM::t2SUBrSPs;
+ NeedPred = true; NeedCC = true; NeedOp3 = true;
+ break;
+ }
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
+ if (OpOpc == ARM::tAND)
+ AddDefaultT1CC(MIB);
+ MIB.addReg(ARM::SP);
+ MIB.addOperand(MI->getOperand(2));
+ if (NeedOp3)
+ MIB.addOperand(MI->getOperand(3));
+ if (NeedPred)
+ AddDefaultPred(MIB);
+ if (NeedCC)
+ AddDefaultCC(MIB);
+
+ // Copy the result from SP to virtual register.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
+ unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
+ ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
+ BuildMI(BB, dl, TII->get(CopyOpc))
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(ARM::SP);
+ MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 4fe4d8b..4649c18 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -65,11 +65,13 @@ namespace llvm {
FMRRD, // double to two gprs.
FMDRR, // Two gprs to double.
- EH_SJLJ_SETJMP, // SjLj exception handling setjmp
- EH_SJLJ_LONGJMP, // SjLj exception handling longjmp
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
THREAD_POINTER,
+ DYN_ALLOC, // Dynamic allocation on the stack.
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -255,6 +257,7 @@ namespace llvm {
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 5dfc4fc..9b54e67 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -138,18 +138,37 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALU,
"add $dst, pc, $rhs * 4", []>;
// ADD rd, sp, #imm8
-// FIXME: hard code sp?
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALU,
"add $dst, $sp, $rhs * 4 @ addrspi", []>;
// ADD sp, sp, #imm7
-// FIXME: hard code sp?
-def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
"add $dst, $rhs * 4", []>;
-// FIXME: Make use of the following?
+// SUB sp, sp, #imm7
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
+ "sub $dst, $rhs * 4", []>;
+
// ADD rm, sp, rm
+def tADDrSPr : TI<(outs GPR:$dst), (ins GPR:$sp, GPR:$rhs), IIC_iALU,
+ "add $dst, $sp, $rhs", []>;
+
// ADD sp, rm
+def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALU,
+ "add $dst, $rhs", []>;
+
+// Pseudo instruction that will expand into a tSUBspi + a copy.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+ NoItinerary, "@ sub $dst, $rhs * 4", []>;
+
+def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+ NoItinerary, "@ add $dst, $rhs", []>;
+
+let Defs = [CPSR] in
+def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ NoItinerary, "@ and $dst, $rhs", []>;
+} // usesCustomDAGSchedInserter
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -549,9 +568,6 @@ def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU,
// TODO: A7-96: STMIA - store multiple.
-def tSUBspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
- "sub $dst, $rhs * 4", []>;
-
// sign-extend byte
def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALU,
"sxtb", " $dst, $src",
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 9305c8a..11b0454 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -75,7 +75,8 @@ def imm1_31 : PatLeaf<(i32 imm), [{
}]>;
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095 : PatLeaf<(i32 imm), [{
+def imm0_4095 : Operand<i32>,
+ PatLeaf<(i32 imm), [{
return (uint32_t)N->getZExtValue() < 4096;
}]>;
@@ -239,7 +240,7 @@ multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
opc, ".w $dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
// 12-bit imm
- def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
+ def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALU,
!strconcat(opc, "w"), " $dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
// register
@@ -431,6 +432,39 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst),
(ins i32imm:$label, i32imm:$id, pred:$p), IIC_iALU,
"adr$p.w $dst, #${label}_${id:no_hash}", []>;
+
+// ADD r, sp, {so_imm|i12}
+def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU,
+ "add", ".w $dst, $sp, $imm", []>;
+def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU,
+ "addw", " $dst, $sp, $imm", []>;
+
+// ADD r, sp, so_reg
+def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU,
+ "add", ".w $dst, $sp, $rhs", []>;
+
+// SUB r, sp, {so_imm|i12}
+def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU,
+ "sub", ".w $dst, $sp, $imm", []>;
+def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU,
+ "subw", " $dst, $sp, $imm", []>;
+
+// SUB r, sp, so_reg
+def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU,
+ "sub", " $dst, $sp, $rhs", []>;
+
+
+// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
+ NoItinerary, "@ sub.w $dst, $sp, $imm", []>;
+def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
+ NoItinerary, "@ subw $dst, $sp, $imm", []>;
+def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+ NoItinerary, "@ sub $dst, $sp, $rhs", []>;
+} // usesCustomDAGSchedInserter
+
+
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a81b790..ea80e47 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -206,9 +206,13 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (NewBase == 0)
return false;
}
- int BaseOpc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ int BaseOpc = !isThumb2
+ ? ARM::ADDri
+ : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
if (Offset < 0) {
- BaseOpc = isThumb2 ? ARM::t2SUBri : ARM::SUBri;
+ BaseOpc = !isThumb2
+ ? ARM::SUBri
+ : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
Offset = - Offset;
}
int ImmedOffset = isThumb2
@@ -329,6 +333,9 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
if (!MI)
return false;
if (MI->getOpcode() != ARM::t2SUBri &&
+ MI->getOpcode() != ARM::t2SUBrSPi &&
+ MI->getOpcode() != ARM::t2SUBrSPi12 &&
+ MI->getOpcode() != ARM::tSUBspi &&
MI->getOpcode() != ARM::SUBri)
return false;
@@ -336,9 +343,10 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
if (Bytes <= 0 || (Limit && Bytes >= Limit))
return false;
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
return (MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- MI->getOperand(2).getImm() == Bytes &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg);
}
@@ -350,6 +358,9 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
if (!MI)
return false;
if (MI->getOpcode() != ARM::t2ADDri &&
+ MI->getOpcode() != ARM::t2ADDrSPi &&
+ MI->getOpcode() != ARM::t2ADDrSPi12 &&
+ MI->getOpcode() != ARM::tADDspi &&
MI->getOpcode() != ARM::ADDri)
return false;
@@ -357,9 +368,10 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
// Make sure the offset fits in 8 bits.
return false;
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
return (MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- MI->getOperand(2).getImm() == Bytes &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg);
}
diff --git a/lib/Target/ARM/README-Thumb2.txt b/lib/Target/ARM/README-Thumb2.txt
index 651d393..48b5278 100644
--- a/lib/Target/ARM/README-Thumb2.txt
+++ b/lib/Target/ARM/README-Thumb2.txt
@@ -1,3 +1,7 @@
//===---------------------------------------------------------------------===//
// Random ideas for the ARM backend (Thumb2 specific).
//===---------------------------------------------------------------------===//
+
+We should be using ADD / SUB rd, sp, rm <shift> instructions.
+
+copyRegToReg should use tMOVgpr2gpr instead of t2MOVr?
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index cf2d099..2b329e0 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -65,11 +65,15 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
if (DestRC == ARM::GPRRegisterClass &&
SrcRC == ARM::GPRRegisterClass) {
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr),
- DestReg).addReg(SrcReg)));
+ // FIXME: Just use tMOVgpr2gpr since it's shorter?
+ if (SrcReg == ARM::SP || DestReg == ARM::SP)
+ BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
+ else
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr),
+ DestReg).addReg(SrcReg)));
return true;
} else if (DestRC == ARM::GPRRegisterClass &&
- SrcRC == ARM::tGPRRegisterClass) {
+ SrcRC == ARM::tGPRRegisterClass) {
BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
return true;
} else if (DestRC == ARM::tGPRRegisterClass &&
@@ -162,26 +166,62 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
}
while (NumBytes) {
- unsigned Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
unsigned ThisVal = NumBytes;
- if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
- NumBytes = 0;
- } else if (ThisVal < 4096) {
- Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
- NumBytes = 0;
+ unsigned Opc = 0;
+ if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+ // mov sp, rn. Note t2MOVr cannot be used.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg);
+ BaseReg = ARM::SP;
+ continue;
+ }
+
+ if (BaseReg == ARM::SP) {
+ // sub sp, sp, #imm7
+ if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ // FIXME: Fix Thumb1 immediate encoding.
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4);
+ NumBytes = 0;
+ continue;
+ }
+
+ // sub rd, sp, so_imm
+ Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
} else {
- // FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = CountLeadingZeros_32(ThisVal);
- ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
- NumBytes &= ~ThisVal;
- assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
- "Bit extraction didn't work?");
+ assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
}
// Build the new ADD / SUB.
- BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)));
+
BaseReg = DestReg;
}
}
@@ -288,7 +328,6 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int Offset,
const ARMBaseInstrInfo &TII) {
unsigned Opcode = MI.getOpcode();
- unsigned NewOpc = Opcode;
const TargetInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
bool isSub = false;
@@ -299,9 +338,12 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ bool isSP = FrameReg == ARM::SP;
if (Offset == 0) {
// Turn it into a move.
- MI.setDesc(TII.get(ARM::t2MOVr));
+ unsigned NewOpc = isSP ? ARM::tMOVgpr2gpr : ARM::t2MOVr;
+ MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.RemoveOperand(FrameRegIdx+1);
return 0;
@@ -310,23 +352,23 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
if (Offset < 0) {
Offset = -Offset;
isSub = true;
- MI.setDesc(TII.get(ARM::t2SUBri));
+ MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri));
+ } else {
+ MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri));
}
// Common case: small offset, fits into instruction.
if (ARM_AM::getT2SOImmVal(Offset) != -1) {
- NewOpc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
- if (NewOpc != Opcode)
- MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
return 0;
}
// Another common case: imm12.
if (Offset < 4096) {
- NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
- if (NewOpc != Opcode)
- MI.setDesc(TII.get(NewOpc));
+ unsigned NewOpc = isSP
+ ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12)
+ : (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12);
+ MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
return 0;
@@ -346,7 +388,7 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
} else {
// AddrModeT2_so cannot handle any offset. If there is no offset
// register then we change to an immediate version.
- NewOpc = Opcode;
+ unsigned NewOpc = Opcode;
if (AddrMode == ARMII::AddrModeT2_so) {
unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
if (OffsetReg != 0) {
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
new file mode 100644
index 0000000..bdc23ba
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | llc -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
+; PR4659
+; PR4682
+
+define hidden arm_aapcscc i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
+entry:
+; CHECK: __gcov_execlp:
+; CHECK: mov sp, r7
+; CHECK: sub sp, #1 * 4
+ call arm_aapcscc void @__gcov_flush() nounwind
+ br i1 undef, label %bb5, label %bb
+
+bb: ; preds = %bb, %entry
+ br i1 undef, label %bb5, label %bb
+
+bb5: ; preds = %bb, %entry
+ %0 = alloca i8*, i32 undef, align 4 ; <i8**> [#uses=1]
+ %1 = call arm_aapcscc i32 @execvp(i8* %path, i8** %0) nounwind ; <i32> [#uses=1]
+ ret i32 %1
+}
+
+declare hidden arm_aapcscc void @__gcov_flush()
+
+declare arm_aapcscc i32 @execvp(i8*, i8**) nounwind
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 60604f0..d183da4 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -2,7 +2,7 @@
define void @test1() {
; CHECK: test1:
-; CHECK: sub.w sp, sp, #256
+; CHECK: sub sp, #64 * 4
%tmp = alloca [ 64 x i32 ] , align 4
ret void
}
@@ -10,7 +10,7 @@ define void @test1() {
define void @test2() {
; CHECK: test2:
; CHECK: sub.w sp, sp, #4160
-; CHECK: sub.w sp, sp, #8
+; CHECK: sub sp, #2 * 4
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
@@ -18,7 +18,7 @@ define void @test2() {
define i32 @test3() {
; CHECK: test3:
; CHECK: sub.w sp, sp, #805306368
-; CHECK: sub.w sp, sp, #16
+; CHECK: sub sp, #4 * 4
%retval = alloca i32, align 4
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16