diff options
author | Tim Northover <tnorthover@apple.com> | 2013-11-08 17:18:07 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2013-11-08 17:18:07 +0000 |
commit | 323ac85d6ad7ba5d9593d8e151d879bd91d82e08 (patch) | |
tree | 1c3b17bc84524be55da9f3ffaf112d3263a564f9 /test/CodeGen/ARM | |
parent | 2b01682aa7b9509e9fa1865ebed3d0a7928f5b7a (diff) | |
download | external_llvm-323ac85d6ad7ba5d9593d8e151d879bd91d82e08.zip external_llvm-323ac85d6ad7ba5d9593d8e151d879bd91d82e08.tar.gz external_llvm-323ac85d6ad7ba5d9593d8e151d879bd91d82e08.tar.bz2 |
ARM: fold prologue/epilogue sp updates into push/pop for code size
ARM prologues usually look like:
push {r7, lr}
sub sp, sp, #4
If code size is extremely important, this can be optimised to the single
instruction:
push {r6, r7, lr}
where we don't actually care about the contents of r6, but pushing it subtracts
4 from sp as a side effect.
This should implement such a conversion, predicated on the "minsize" function
attribute (-Oz) since I've yet to find any code it actually makes faster.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194264 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r-- | test/CodeGen/ARM/fold-stack-adjust.ll | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll new file mode 100644 index 0000000..c8c48fa --- /dev/null +++ b/test/CodeGen/ARM/fold-stack-adjust.ll @@ -0,0 +1,126 @@ +; RUN: llc -mtriple=thumbv7-apple-darwin-eabi < %s | FileCheck %s +; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1 +; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS + + +declare void @bar(i8*) + +%bigVec = type [2 x double] + +@var = global %bigVec zeroinitializer + +define void @check_simple() minsize { +; CHECK-LABEL: check_simple: +; CHECK: push.w {r7, r8, r9, r10, r11, lr} +; CHECK-NOT: sub sp, sp, +; ... +; CHECK-NOT: add sp, sp, +; CHECK: pop.w {r7, r8, r9, r10, r11, pc} + +; CHECK-T1-LABEL: check_simple: +; CHECK-T1: push {r3, r4, r5, r6, r7, lr} +; CHECK-T1: add r7, sp, #16 +; CHECK-T1-NOT: sub sp, sp, +; ... +; CHECK-T1-NOT: add sp, sp, +; CHECK-T1: pop {r3, r4, r5, r6, r7, pc} + + ; iOS always has a frame pointer and messing with the push affects + ; how it's set in the prologue. Make sure we get that right. +; CHECK-IOS-LABEL: check_simple: +; CHECK-IOS: push {r3, r4, r5, r6, r7, lr} +; CHECK-NOT: sub sp, +; CHECK-IOS: add r7, sp, #16 +; CHECK-NOT: sub sp, +; ... +; CHECK-NOT: add sp, +; CHEC: pop {r3, r4, r5, r6, r7, pc} + + %var = alloca i8, i32 16 + call void @bar(i8* %var) + ret void +} + +define void @check_simple_too_big() minsize { +; CHECK-LABEL: check_simple_too_big: +; CHECK: push.w {r11, lr} +; CHECK: sub sp, +; ... +; CHECK: add sp, +; CHECK: pop.w {r11, pc} + %var = alloca i8, i32 64 + call void @bar(i8* %var) + ret void +} + +define void @check_vfp_fold() minsize { +; CHECK-LABEL: check_vfp_fold: +; CHECK: push {r[[GLOBREG:[0-9]+]], lr} +; CHECK: vpush {d6, d7, d8, d9} +; CHECK-NOT: sub sp, +; ... +; CHECK: vldmia r[[GLOBREG]], {d8, d9} +; ... +; CHECK-NOT: add sp, +; CHECK: vpop {d6, d7, d8, d9} +; CHECKL pop {r[[GLOBREG]], pc} + + ; iOS uses aligned NEON stores here, which is convenient since we + ; want to make sure that works too. +; CHECK-IOS-LABEL: check_vfp_fold: +; CHECK-IOS: push {r0, r1, r2, r3, r4, r7, lr} +; CHECK-IOS: sub.w r4, sp, #16 +; CHECK-IOS: bic r4, r4, #15 +; CHECK-IOS: mov sp, r4 +; CHECK-IOS: vst1.64 {d8, d9}, [r4:128] +; ... +; CHECK-IOS: add r4, sp, #16 +; CHECK-IOS: vld1.64 {d8, d9}, [r4:128] +; CHECK-IOS: mov sp, r4 +; CHECK-IOS: pop {r4, r7, pc} + + %var = alloca i8, i32 16 + + %tmp = load %bigVec* @var + call void @bar(i8* %var) + store %bigVec %tmp, %bigVec* @var + + ret void +} + +; This function should use just enough space that the "add sp, sp, ..." could be +; folded in except that doing so would clobber the value being returned. +define i64 @check_no_return_clobber() minsize { +; CHECK-LABEL: check_no_return_clobber: +; CHECK: push.w {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NOT: sub sp, +; ... +; CHECK: add sp, #40 +; CHECK: pop.w {r11, pc} + + ; Just to keep iOS FileCheck within previous function: +; CHECK-IOS-LABEL: check_no_return_clobber: + + %var = alloca i8, i32 40 + call void @bar(i8* %var) + ret i64 0 +} + +define arm_aapcs_vfpcc double @check_vfp_no_return_clobber() minsize { +; CHECK-LABEL: check_vfp_no_return_clobber: +; CHECK: push {r[[GLOBREG:[0-9]+]], lr} +; CHECK: vpush {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9} +; CHECK-NOT: sub sp, +; ... +; CHECK: add sp, #64 +; CHECK: vpop {d8, d9} +; CHECK: pop {r[[GLOBREG]], pc} + + %var = alloca i8, i32 64 + + %tmp = load %bigVec* @var + call void @bar(i8* %var) + store %bigVec %tmp, %bigVec* @var + + ret double 1.0 +} |