aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/ARM
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2013-11-08 17:18:07 +0000
committerTim Northover <tnorthover@apple.com>2013-11-08 17:18:07 +0000
commit323ac85d6ad7ba5d9593d8e151d879bd91d82e08 (patch)
tree1c3b17bc84524be55da9f3ffaf112d3263a564f9 /test/CodeGen/ARM
parent2b01682aa7b9509e9fa1865ebed3d0a7928f5b7a (diff)
downloadexternal_llvm-323ac85d6ad7ba5d9593d8e151d879bd91d82e08.zip
external_llvm-323ac85d6ad7ba5d9593d8e151d879bd91d82e08.tar.gz
external_llvm-323ac85d6ad7ba5d9593d8e151d879bd91d82e08.tar.bz2
ARM: fold prologue/epilogue sp updates into push/pop for code size
ARM prologues usually look like: push {r7, lr} sub sp, sp, #4 If code size is extremely important, this can be optimised to the single instruction: push {r6, r7, lr} where we don't actually care about the contents of r6, but pushing it subtracts 4 from sp as a side effect. This should implement such a conversion, predicated on the "minsize" function attribute (-Oz) since I've yet to find any code it actually makes faster. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194264 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r--test/CodeGen/ARM/fold-stack-adjust.ll126
1 files changed, 126 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
new file mode 100644
index 0000000..c8c48fa
--- /dev/null
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -0,0 +1,126 @@
+; RUN: llc -mtriple=thumbv7-apple-darwin-eabi < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1
+; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS
+
+
+declare void @bar(i8*)
+
+%bigVec = type [2 x double]
+
+@var = global %bigVec zeroinitializer
+
+define void @check_simple() minsize {
+; CHECK-LABEL: check_simple:
+; CHECK: push.w {r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp, sp,
+; ...
+; CHECK-NOT: add sp, sp,
+; CHECK: pop.w {r7, r8, r9, r10, r11, pc}
+
+; CHECK-T1-LABEL: check_simple:
+; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
+; CHECK-T1: add r7, sp, #16
+; CHECK-T1-NOT: sub sp, sp,
+; ...
+; CHECK-T1-NOT: add sp, sp,
+; CHECK-T1: pop {r3, r4, r5, r6, r7, pc}
+
+ ; iOS always has a frame pointer and messing with the push affects
+ ; how it's set in the prologue. Make sure we get that right.
+; CHECK-IOS-LABEL: check_simple:
+; CHECK-IOS: push {r3, r4, r5, r6, r7, lr}
+; CHECK-NOT: sub sp,
+; CHECK-IOS: add r7, sp, #16
+; CHECK-NOT: sub sp,
+; ...
+; CHECK-NOT: add sp,
+; CHEC: pop {r3, r4, r5, r6, r7, pc}
+
+ %var = alloca i8, i32 16
+ call void @bar(i8* %var)
+ ret void
+}
+
+define void @check_simple_too_big() minsize {
+; CHECK-LABEL: check_simple_too_big:
+; CHECK: push.w {r11, lr}
+; CHECK: sub sp,
+; ...
+; CHECK: add sp,
+; CHECK: pop.w {r11, pc}
+ %var = alloca i8, i32 64
+ call void @bar(i8* %var)
+ ret void
+}
+
+define void @check_vfp_fold() minsize {
+; CHECK-LABEL: check_vfp_fold:
+; CHECK: push {r[[GLOBREG:[0-9]+]], lr}
+; CHECK: vpush {d6, d7, d8, d9}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: vldmia r[[GLOBREG]], {d8, d9}
+; ...
+; CHECK-NOT: add sp,
+; CHECK: vpop {d6, d7, d8, d9}
+; CHECKL pop {r[[GLOBREG]], pc}
+
+ ; iOS uses aligned NEON stores here, which is convenient since we
+ ; want to make sure that works too.
+; CHECK-IOS-LABEL: check_vfp_fold:
+; CHECK-IOS: push {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-IOS: sub.w r4, sp, #16
+; CHECK-IOS: bic r4, r4, #15
+; CHECK-IOS: mov sp, r4
+; CHECK-IOS: vst1.64 {d8, d9}, [r4:128]
+; ...
+; CHECK-IOS: add r4, sp, #16
+; CHECK-IOS: vld1.64 {d8, d9}, [r4:128]
+; CHECK-IOS: mov sp, r4
+; CHECK-IOS: pop {r4, r7, pc}
+
+ %var = alloca i8, i32 16
+
+ %tmp = load %bigVec* @var
+ call void @bar(i8* %var)
+ store %bigVec %tmp, %bigVec* @var
+
+ ret void
+}
+
+; This function should use just enough space that the "add sp, sp, ..." could be
+; folded in except that doing so would clobber the value being returned.
+define i64 @check_no_return_clobber() minsize {
+; CHECK-LABEL: check_no_return_clobber:
+; CHECK: push.w {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: add sp, #40
+; CHECK: pop.w {r11, pc}
+
+ ; Just to keep iOS FileCheck within previous function:
+; CHECK-IOS-LABEL: check_no_return_clobber:
+
+ %var = alloca i8, i32 40
+ call void @bar(i8* %var)
+ ret i64 0
+}
+
+define arm_aapcs_vfpcc double @check_vfp_no_return_clobber() minsize {
+; CHECK-LABEL: check_vfp_no_return_clobber:
+; CHECK: push {r[[GLOBREG:[0-9]+]], lr}
+; CHECK: vpush {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: add sp, #64
+; CHECK: vpop {d8, d9}
+; CHECK: pop {r[[GLOBREG]], pc}
+
+ %var = alloca i8, i32 64
+
+ %tmp = load %bigVec* @var
+ call void @bar(i8* %var)
+ store %bigVec %tmp, %bigVec* @var
+
+ ret double 1.0
+}