diff options
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 15 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb.td | 10 | ||||
-rw-r--r-- | lib/Target/ARM/README-Thumb.txt | 9 | ||||
-rw-r--r-- | test/CodeGen/Thumb/ldr_ext.ll | 17 |
4 files changed, 27 insertions, 24 deletions
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ce2a170..75e956b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -454,16 +454,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N, // FIXME dl should come from the parent load or store, not the address DebugLoc dl = Op.getDebugLoc(); if (N.getOpcode() != ISD::ADD) { - Base = N; - // We must materialize a zero in a reg! Returning a constant here - // wouldn't work without additional code to position the node within - // ISel's topological ordering in a place where ISel will process it - // normally. Instead, just explicitly issue a tMOVri8 node! - SDValue CC = CurDAG->getRegister(ARM::CPSR, MVT::i32); - SDValue Pred = CurDAG->getTargetConstant(0xEULL, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { CC, CurDAG->getTargetConstant(0, MVT::i32), Pred, PredReg }; - Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32, Ops,4),0); + ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); + if (!NC || NC->getZExtValue() != 0) + return false; + + Base = Offset = N; return true; } diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index a473a20..f2c5a46 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -224,10 +224,12 @@ def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), "ldrh", " $dst, $addr", [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>; +let AddedComplexity = 10 in def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), "ldrsb", " $dst, $addr", [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; +let AddedComplexity = 10 in def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), "ldrsh", " $dst, $addr", [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; @@ -620,6 +622,14 @@ def : T1Pat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; def : T1Pat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; def : T1Pat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>; +// If it's possible to use [r,r] address mode for sextload, select to +// ldr{b|h} + sxt{b|h} instead. +def : TPat<(sextloadi8 t_addrmode_s1:$addr), + (tSXTB (tLDRB t_addrmode_s1:$addr))>; +def : TPat<(sextloadi16 t_addrmode_s2:$addr), + (tSXTH (tLDRH t_addrmode_s2:$addr))>; + + // Large immediate handling. // Two piece imms. diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index 8ffe0b1..cc01794 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -244,12 +244,3 @@ to toggle the 's' bit since they do not set CPSR when they are inside IT blocks. Make use of hi register variants of cmp: tCMPhir / tCMPZhir. //===---------------------------------------------------------------------===// - -Rather than generating ldrsb, sometimes it's better to select to ldrb + sxtb. -The problem is ldrsb addressing mode [r, r] means the zero offset requires an -extra move. e.g. ldr_ext.ll test3: - movs r1, #0 - ldrsb r0, [r0, r1] -=> - ldrb r0, [r0, #0] - sxtb r0, r0 diff --git a/test/CodeGen/Thumb/ldr_ext.ll b/test/CodeGen/Thumb/ldr_ext.ll index 4b2a7b2..f8b9d15 100644 --- a/test/CodeGen/Thumb/ldr_ext.ll +++ b/test/CodeGen/Thumb/ldr_ext.ll @@ -1,27 +1,34 @@ -; RUN: llvm-as < %s | llc -march=thumb | grep ldrb | count 1 -; RUN: llvm-as < %s | llc -march=thumb | grep ldrh | count 1 -; RUN: llvm-as < %s | llc -march=thumb | grep ldrsb | count 1 -; RUN: llvm-as < %s | llc -march=thumb | grep ldrsh | count 1 +; RUN: llvm-as < %s | llc -march=thumb | FileCheck %s define i32 @test1(i8* %v.pntr.s0.u1) { +; CHECK: test1: +; CHECK: ldrb %tmp.u = load i8* %v.pntr.s0.u1 %tmp1.s = zext i8 %tmp.u to i32 ret i32 %tmp1.s } define i32 @test2(i16* %v.pntr.s0.u1) { +; CHECK: test2: +; CHECK: ldrh %tmp.u = load i16* %v.pntr.s0.u1 %tmp1.s = zext i16 %tmp.u to i32 ret i32 %tmp1.s } define i32 @test3(i8* %v.pntr.s1.u0) { - %tmp.s = load i8* %v.pntr.s1.u0 +; CHECK: test3: +; CHECK: ldrb +; CHECK: sxtb + %tmp.s = load i8* %v.pntr.s1.u0 %tmp1.s = sext i8 %tmp.s to i32 ret i32 %tmp1.s } define i32 @test4() { +; CHECK: test4: +; CHECK: movs +; CHECK: ldrsh %tmp.s = load i16* null %tmp1.s = sext i16 %tmp.s to i32 ret i32 %tmp1.s |