From 139e407d526193017d42473c8d4892933de78f14 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 20 Dec 2012 19:59:30 +0000 Subject: On some ARM cpus, flags setting movs with shifter operand, i.e. lsl, lsr, asr, are more expensive than the non-flag setting variant. Teach thumb2 size reduction pass to avoid generating them unless we are optimizing for size. rdar://12892707 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170728 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/Thumb2/thumb2-shifter.ll | 82 +++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 13 deletions(-) (limited to 'test/CodeGen/Thumb2') diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll index 98854a1..05dd90c 100644 --- a/test/CodeGen/Thumb2/thumb2-shifter.ll +++ b/test/CodeGen/Thumb2/thumb2-shifter.ll @@ -1,24 +1,27 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8 +; RUN: llc < %s -march=thumb -mcpu=swift | FileCheck %s --check-prefix=SWIFT + +; rdar://12892707 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) { -; CHECK: t2ADDrs_lsl -; CHECK: add.w r0, r0, r1, lsl #16 +; A8: t2ADDrs_lsl +; A8: add.w r0, r0, r1, lsl #16 %A = shl i32 %Y, 16 %B = add i32 %X, %A ret i32 %B } define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) { -; CHECK: t2ADDrs_lsr -; CHECK: add.w r0, r0, r1, lsr #16 +; A8: t2ADDrs_lsr +; A8: add.w r0, r0, r1, lsr #16 %A = lshr i32 %Y, 16 %B = add i32 %X, %A ret i32 %B } define i32 @t2ADDrs_asr(i32 %X, i32 %Y) { -; CHECK: t2ADDrs_asr -; CHECK: add.w r0, r0, r1, asr #16 +; A8: t2ADDrs_asr +; A8: add.w r0, r0, r1, asr #16 %A = ashr i32 %Y, 16 %B = add i32 %X, %A ret i32 %B @@ -26,8 +29,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) { ; i32 ror(n) = (x >> n) | (x << (32 - n)) define i32 @t2ADDrs_ror(i32 %X, i32 %Y) { -; CHECK: t2ADDrs_ror -; CHECK: add.w r0, r0, r1, ror #16 +; A8: t2ADDrs_ror +; A8: add.w r0, r0, r1, ror #16 %A = lshr i32 %Y, 16 %B = shl i32 %Y, 16 %C = or i32 %B, %A @@ -36,13 +39,66 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) { } define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) { -; CHECK: t2ADDrs_noRegShift -; CHECK: uxtb r2, r2 -; CHECK: lsls r1, r2 -; CHECK: add r0, r1 +; A8: t2ADDrs_noRegShift +; A8: uxtb r2, r2 +; A8: lsls r1, r2 +; A8: add r0, r1 + +; SWIFT: t2ADDrs_noRegShift +; SWIFT-NOT: lsls +; SWIFT: lsl.w + %shift.upgrd.1 = zext i8 %sh to i32 + %A = shl i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A + ret i32 %B +} + +define i32 @t2ADDrs_noRegShift2(i32 %X, i32 %Y, i8 %sh) { +; A8: t2ADDrs_noRegShift2 +; A8: uxtb r2, r2 +; A8: lsrs r1, r2 +; A8: add r0, r1 + +; SWIFT: t2ADDrs_noRegShift2 +; SWIFT-NOT: lsrs +; SWIFT: lsr.w + %shift.upgrd.1 = zext i8 %sh to i32 + %A = lshr i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A + ret i32 %B +} + +define i32 @t2ADDrs_noRegShift3(i32 %X, i32 %Y, i8 %sh) { +; A8: t2ADDrs_noRegShift3 +; A8: uxtb r2, r2 +; A8: asrs r1, r2 +; A8: add r0, r1 + +; SWIFT: t2ADDrs_noRegShift3 +; SWIFT-NOT: asrs +; SWIFT: asr.w + %shift.upgrd.1 = zext i8 %sh to i32 + %A = ashr i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A + ret i32 %B +} + +define i32 @t2ADDrs_optsize(i32 %X, i32 %Y, i8 %sh) optsize { +; SWIFT: t2ADDrs_optsize +; SWIFT-NOT: lsl.w +; SWIFT: lsls %shift.upgrd.1 = zext i8 %sh to i32 %A = shl i32 %Y, %shift.upgrd.1 %B = add i32 %X, %A ret i32 %B } +define i32 @t2ADDrs_minsize(i32 %X, i32 %Y, i8 %sh) minsize { +; SWIFT: t2ADDrs_minsize +; SWIFT-NOT: lsr.w +; SWIFT: lsrs + %shift.upgrd.1 = zext i8 %sh to i32 + %A = lshr i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A + ret i32 %B +} -- cgit v1.1