diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-11-17 20:13:28 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-11-17 20:13:28 +0000 |
commit | c4af4638dfdab0dc3b6257276cfad2ee45053060 (patch) | |
tree | c09b56536b88953a85defe07d095b13cfa625b03 /test | |
parent | f2dc4aa562e2478a73fe5aeeeec16b1e496a0642 (diff) | |
download | external_llvm-c4af4638dfdab0dc3b6257276cfad2ee45053060.zip external_llvm-c4af4638dfdab0dc3b6257276cfad2ee45053060.tar.gz external_llvm-c4af4638dfdab0dc3b6257276cfad2ee45053060.tar.bz2 |
Remove ARM isel hacks that fold large immediates into a pair of add, sub, and,
and xor. The 32-bit move immediates can be hoisted out of loops by machine
LICM but the isel hacks were preventing them.
Instead, let peephole optimization pass recognize registers that are defined by
immediates and the ARM target hook will fold the immediates in.
Other changes include 1) do not fold and / xor into cmp to isel TST / TEQ
instructions if there are multiple uses. This happens when the 'and' is live
out, machine sink would have sinked the computation and that ends up pessimizing
code. The peephole pass would recognize situations where the 'and' can be
toggled to define CPSR and eliminate the comparison anyway.
2) Move peephole pass to after machine LICM, sink, and CSE to avoid blocking
important optimizations.
rdar://8663787, rdar://8241368
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119548 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/arm-and-tst-peephole.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/ARM/select_xform.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/machine-licm.ll | 41 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-mov.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-select_xform.ll | 4 |
5 files changed, 49 insertions, 10 deletions
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 461150f..50c638b 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -27,9 +27,8 @@ tailrecurse: ; preds = %sw.bb, %entry ; ARM-NEXT: beq ; THUMB: movs r5, #3 -; THUMB-NEXT: mov r6, r4 -; THUMB-NEXT: ands r6, r5 -; THUMB-NEXT: tst r4, r5 +; THUMB-NEXT: ands r5, r4 +; THUMB-NEXT: cmp r5, #0 ; THUMB-NEXT: beq ; T2: ands r12, r12, #3 diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index 21bc5fa..5dabfc3 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -8,7 +8,8 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; ARM: movgt r0, r1 ; T2: t1: -; T2: sub.w r0, r1, #-2147483648 +; T2: mvn r0, #-2147483648 +; T2: add r0, r1 ; T2: movgt r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index 14d04a4..37a15ff 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -77,10 +77,49 @@ bb2: } ; CHECK-NOT: LCPI1_0: -; CHECK: .subsections_via_symbols declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone + +; rdar://8241368 +; isel should not fold immediate into eor's which would have prevented LICM. +define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone { +; CHECK: t3: +bb.nph: +; CHECK: bb.nph +; CHECK: movw {{(r[0-9])|(lr)}}, #32768 +; CHECK: movs {{(r[0-9])|(lr)}}, #8 +; CHECK: movw [[REGISTER:(r[0-9])|(lr)]], #16386 +; CHECK: movw {{(r[0-9])|(lr)}}, #65534 +; CHECK: movt {{(r[0-9])|(lr)}}, #65535 + br label %bb + +bb: ; preds = %bb, %bb.nph +; CHECK: bb +; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]] +; CHECK: eor.w +; CHECK-NOT: eor +; CHECK: and + %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2] + %crc_addr.112 = phi i16 [ %crc, %bb.nph ], [ %crc_addr.2, %bb ] ; <i16> [#uses=3] + %i.011 = phi i8 [ 0, %bb.nph ], [ %7, %bb ] ; <i8> [#uses=1] + %0 = trunc i16 %crc_addr.112 to i8 ; <i8> [#uses=1] + %1 = xor i8 %data_addr.013, %0 ; <i8> [#uses=1] + %2 = and i8 %1, 1 ; <i8> [#uses=1] + %3 = icmp eq i8 %2, 0 ; <i1> [#uses=2] + %4 = xor i16 %crc_addr.112, 16386 ; <i16> [#uses=1] + %crc_addr.0 = select i1 %3, i16 %crc_addr.112, i16 %4 ; <i16> [#uses=1] + %5 = lshr i16 %crc_addr.0, 1 ; <i16> [#uses=2] + %6 = or i16 %5, -32768 ; <i16> [#uses=1] + %crc_addr.2 = select i1 %3, i16 %5, i16 %6 ; <i16> [#uses=2] + %7 = add i8 %i.011, 1 ; <i8> [#uses=2] + %8 = lshr i8 %data_addr.013, 1 ; <i8> [#uses=1] + %exitcond = icmp eq i8 %7, 8 ; <i1> [#uses=1] + br i1 %exitcond, label %bb8, label %bb + +bb8: ; preds = %bb + ret i16 %crc_addr.2 +} diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll index 1dc3614..adb6dde 100644 --- a/test/CodeGen/Thumb2/thumb2-mov.ll +++ b/test/CodeGen/Thumb2/thumb2-mov.ll @@ -53,7 +53,7 @@ define i32 @t2_const_var2_2_ok_1(i32 %lhs) { define i32 @t2_const_var2_2_ok_2(i32 %lhs) { ;CHECK: t2_const_var2_2_ok_2: -;CHECK: add.w r0, r0, #-1426063360 +;CHECK: add.w r0, r0, #2868903936 ;CHECK: add.w r0, r0, #47616 %ret = add i32 %lhs, 2868951552 ; 0xab00ba00 ret i32 %ret @@ -61,7 +61,7 @@ define i32 @t2_const_var2_2_ok_2(i32 %lhs) { define i32 @t2_const_var2_2_ok_3(i32 %lhs) { ;CHECK: t2_const_var2_2_ok_3: -;CHECK: add.w r0, r0, #-1426019584 +;CHECK: add.w r0, r0, #2868947712 ;CHECK: adds r0, #16 %ret = add i32 %lhs, 2868947728 ; 0xab00ab10 ret i32 %ret @@ -69,7 +69,7 @@ define i32 @t2_const_var2_2_ok_3(i32 %lhs) { define i32 @t2_const_var2_2_ok_4(i32 %lhs) { ;CHECK: t2_const_var2_2_ok_4: -;CHECK: add.w r0, r0, #-1426019584 +;CHECK: add.w r0, r0, #2868947712 ;CHECK: add.w r0, r0, #1048592 %ret = add i32 %lhs, 2869996304 ; 0xab10ab10 ret i32 %ret diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll index 56cb1f6..ceefabb 100644 --- a/test/CodeGen/Thumb2/thumb2-select_xform.ll +++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll @@ -2,8 +2,8 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: t1 -; CHECK: sub.w r0, r1, #-2147483648 -; CHECK: subs r0, #1 +; CHECK: mvn r0, #-2147483648 +; CHECK: add r0, r1 ; CHECK: cmp r2, #10 ; CHECK: it gt ; CHECK: movgt r0, r1 |