diff options
author | Chad Rosier <mcrosier@apple.com> | 2011-11-17 07:15:58 +0000 |
---|---|---|
committer | Chad Rosier <mcrosier@apple.com> | 2011-11-17 07:15:58 +0000 |
commit | 478b06c9801f0b75c5216ce0886ac3d630d4fc7b (patch) | |
tree | 54fa90701be13d4ab0be4cfb05352cfec4de49be /test | |
parent | ec43d1f553cb440df1b435d3798063d0cba6a117 (diff) | |
download | external_llvm-478b06c9801f0b75c5216ce0886ac3d630d4fc7b.zip external_llvm-478b06c9801f0b75c5216ce0886ac3d630d4fc7b.tar.gz external_llvm-478b06c9801f0b75c5216ce0886ac3d630d4fc7b.tar.bz2 |
When fast iseling a GEP, accumulate the offset rather than emitting a series of
ADDs. MaxOffs is used as a threshold to limit the size of the offset. Tradeoffs
being: (1) If we can't materialize the large constant then we'll cause fast-isel
to bail. (2) Too large of an offset can't be directly encoded in the ADD
resulting in a MOV+ADD. Generally not a bad thing because otherwise we would
have had ADD+ADD, but on Thumb this turns into a MOVS+MOVT+ADD. Working on a fix
for that. (3) Conversely, too low of a threshold we'll miss opportunities to
coalesce ADDs.
rdar://10412592
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144886 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/fast-isel-GEP-coalesce.ll | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll new file mode 100644 index 0000000..dbb634d --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB + +%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] } +%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] } + +@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4 +@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4 +@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4 + +define i32* @t1() nounwind { +entry: +; ARM: t1 +; THUMB: t1 + %addr = alloca i32*, align 4 + store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4 +; ARM: add r0, r0, #124 +; THUMB: adds r0, #124 + %0 = load i32** %addr, align 4 + ret i32* %0 +} + +define i32* @t2() nounwind { +entry: +; ARM: t2 +; THUMB: t2 + %addr = alloca i32*, align 4 + store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4 +; ARM: movw r1, #1148 +; ARM: add r0, r0, r1 +; THUMB: addw r0, r0, #1148 + %0 = load i32** %addr, align 4 + ret i32* %0 +} + +define i32* @t3() nounwind { +entry: +; ARM: t3 +; THUMB: t3 + %addr = alloca i32*, align 4 + store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4 +; ARM: add r0, r0, #140 +; THUMB: adds r0, #140 + %0 = load i32** %addr, align 4 + ret i32* %0 +} + +define i32* @t4() nounwind { +entry: +; ARM: t4 +; THUMB: t4 + %addr = alloca i32*, align 4 + store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4 +; ARM-NOT: movw r{{[0-9]}}, #1060 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4 +; ARM: movw r{{[0-9]}}, #1284 +; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284 + %0 = load i32** %addr, align 4 + ret i32* %0 +} |