diff options
author | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2013-02-21 00:38:25 +0000 |
---|---|---|
committer | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2013-02-21 00:38:25 +0000 |
commit | 421021157eda12453b4fea7ea853d8c472bd8532 (patch) | |
tree | 2b12da45666d6870868c6138293967c0c7b3d769 /test | |
parent | 5c43245bf459c77077b607e1b55e6928cfbe464e (diff) | |
download | external_llvm-421021157eda12453b4fea7ea853d8c472bd8532.zip external_llvm-421021157eda12453b4fea7ea853d8c472bd8532.tar.gz external_llvm-421021157eda12453b4fea7ea853d8c472bd8532.tar.bz2 |
PPCDAGToDAGISel::PostprocessISelDAG()
This patch implements the PPCDAGToDAGISel::PostprocessISelDAG virtual
method to perform post-selection peephole optimizations on the DAG
representation.
One optimization is implemented here: folds to clean up complex
addressing expressions for thread-local storage and medium code
model. It will also be useful for large code model sequences when
those are added later. I originally thought about doing this on the
MI representation prior to register assignment, but it's difficult to
do effective global dead code elimination at that point. DCE is
trivial on the DAG representation.
A typical example of a candidate code sequence in assembly:
addis 3, 2, globalvar@toc@ha
addi 3, 3, globalvar@toc@l
lwz 5, 0(3)
When the final instruction is a load or store with an immediate offset
of zero, the offset from the add-immediate can replace the zero,
provided the relocation information is carried along:
addis 3, 2, globalvar@toc@ha
lwz 5, globalvar@toc@l(3)
Since the addi can in general have multiple uses, we need to only
delete the instruction when the last use is removed.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175697 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/PowerPC/mcm-10.ll | 25 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/mcm-11.ll | 27 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/mcm-12.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/mcm-obj-2.ll | 77 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/tls-2.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/tls-ld-2.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/tls.ll | 2 |
7 files changed, 187 insertions, 1 deletions
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll new file mode 100644 index 0000000..4bec3e1 --- /dev/null +++ b/test/CodeGen/PowerPC/mcm-10.ll @@ -0,0 +1,25 @@ +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s + +; Test peephole optimization for medium code model (32-bit TOC offsets) +; for loading and storing a static variable scoped to a function. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@test_fn_static.si = internal global i32 0, align 4 + +define signext i32 @test_fn_static() nounwind { +entry: + %0 = load i32* @test_fn_static.si, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @test_fn_static.si, align 4 + ret i32 %0 +} + +; CHECK: test_fn_static: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK: .type [[VAR]],@object +; CHECK: .local [[VAR]] +; CHECK: .comm [[VAR]],4,4 diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll new file mode 100644 index 0000000..f2bc4c9 --- /dev/null +++ b/test/CodeGen/PowerPC/mcm-11.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s + +; Test peephole optimization for medium code model (32-bit TOC offsets) +; for loading and storing a file-scope static variable. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@gi = global i32 5, align 4 + +define signext i32 @test_file_static() nounwind { +entry: + %0 = load i32* @gi, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @gi, align 4 + ret i32 %0 +} + +; CHECK: test_file_static: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK: .type [[VAR]],@object +; CHECK: .data +; CHECK: .globl [[VAR]] +; CHECK: [[VAR]]: +; CHECK: .long 5 diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll new file mode 100644 index 0000000..911305d --- /dev/null +++ b/test/CodeGen/PowerPC/mcm-12.ll @@ -0,0 +1,18 @@ +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s + +; Test peephole optimization for medium code model (32-bit TOC offsets) +; for loading a value from the constant pool (TOC-relative). + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define double @test_double_const() nounwind { +entry: + ret double 0x3F4FD4920B498CF0 +} + +; CHECK: [[VAR:[a-z0-9A-Z_.]+]]: +; CHECK: .quad 4562098671269285104 +; CHECK: test_double_const: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha +; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll new file mode 100644 index 0000000..2dd1718 --- /dev/null +++ b/test/CodeGen/PowerPC/mcm-obj-2.ll @@ -0,0 +1,77 @@ +; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \ +; RUN: elf-dump --dump-section-data | FileCheck %s + +; FIXME: When asm-parse is available, could make this an assembly test. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@test_fn_static.si = internal global i32 0, align 4 + +define signext i32 @test_fn_static() nounwind { +entry: + %0 = load i32* @test_fn_static.si, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @test_fn_static.si, align 4 + ret i32 %0 +} + +; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for +; accessing function-scoped variable si. +; +; CHECK: Relocation 0 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM2:[0-9]+]] +; CHECK-NEXT: 'r_type', 0x00000032 +; CHECK: Relocation 1 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM2]] +; CHECK-NEXT: 'r_type', 0x00000030 +; CHECK: Relocation 2 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM2]] +; CHECK-NEXT: 'r_type', 0x00000030 + +@gi = global i32 5, align 4 + +define signext i32 @test_file_static() nounwind { +entry: + %0 = load i32* @gi, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @gi, align 4 + ret i32 %0 +} + +; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for +; accessing file-scope variable gi. +; +; CHECK: Relocation 3 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM3:[0-9]+]] +; CHECK-NEXT: 'r_type', 0x00000032 +; CHECK: Relocation 4 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM3]] +; CHECK-NEXT: 'r_type', 0x00000030 +; CHECK: Relocation 5 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM3]] +; CHECK-NEXT: 'r_type', 0x00000030 + +define double @test_double_const() nounwind { +entry: + ret double 0x3F4FD4920B498CF0 +} + +; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for +; accessing a constant. +; +; CHECK: Relocation 6 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM4:[0-9]+]] +; CHECK-NEXT: 'r_type', 0x00000032 +; CHECK: Relocation 7 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM4]] +; CHECK-NEXT: 'r_type', 0x00000030 + diff --git a/test/CodeGen/PowerPC/tls-2.ll b/test/CodeGen/PowerPC/tls-2.ll new file mode 100644 index 0000000..20d8fe4 --- /dev/null +++ b/test/CodeGen/PowerPC/tls-2.ll @@ -0,0 +1,15 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-freebsd10.0" +; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s + +@a = thread_local global i32 0, align 4 + +;CHECK: localexec: +define i32 @localexec() nounwind { +entry: +;CHECK: addis [[REG1:[0-9]+]], 13, a@tprel@ha +;CHECK-NEXT: li [[REG2:[0-9]+]], 42 +;CHECK-NEXT: stw [[REG2]], a@tprel@l([[REG1]]) + store i32 42, i32* @a, align 4 + ret i32 0 +} diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll new file mode 100644 index 0000000..4954afe --- /dev/null +++ b/test/CodeGen/PowerPC/tls-ld-2.ll @@ -0,0 +1,24 @@ +; RUN: llc -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck %s + +; Test peephole optimization for thread-local storage using the +; local dynamic model. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@a = hidden thread_local global i32 0, align 4 + +define signext i32 @main() nounwind { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %0 = load i32* @a, align 4 + ret i32 %0 +} + +; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha +; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l +; CHECK-NEXT: bl __tls_get_addr(a@tlsld) +; CHECK-NEXT: nop +; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha +; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]]) diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll index 713893b..9021f03 100644 --- a/test/CodeGen/PowerPC/tls.ll +++ b/test/CodeGen/PowerPC/tls.ll @@ -1,6 +1,6 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-freebsd10.0" -; RUN: llc < %s -march=ppc64 | FileCheck %s +; RUN: llc -O0 < %s -march=ppc64 | FileCheck %s @a = thread_local global i32 0, align 4 |