diff options
author | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-08-05 11:23:46 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-08-05 11:23:46 +0000 |
commit | 93795574785de252703591e7fcc8f052c762f25e (patch) | |
tree | de693d743c5334444b688797de354cdc279bbdbe /test | |
parent | f8e16c6f5a3a0d2cc6f7ae6dae0a8f55a89cfb2f (diff) | |
download | external_llvm-93795574785de252703591e7fcc8f052c762f25e.zip external_llvm-93795574785de252703591e7fcc8f052c762f25e.tar.gz external_llvm-93795574785de252703591e7fcc8f052c762f25e.tar.bz2 |
[SystemZ] Use BRCT and BRCTG to eliminate add-&-compare sequences
This patch just uses a peephole test for "add; compare; branch" sequences
within a single block. The IR optimizers already convert loops to
decrement-and-branch-on-nonzero form in some cases, so even this
simplistic test triggers many times during a clang bootstrap and
projects/test-suite run. It looks like there are still cases where we
need to more strongly prefer branches on nonzero though. E.g. I saw a
case where a loop that started out with a check for 0 ended up with a
check for -1. I'll try to look at that sometime.
I ended up adding the Reference class because MachineInstr::readsRegister()
doesn't check for subregisters (by design, as far as I could tell).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187723 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/SystemZ/Large/branch-range-07.py | 68 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/Large/branch-range-08.py | 69 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/loop-01.ll | 101 |
3 files changed, 237 insertions, 1 deletions
diff --git a/test/CodeGen/SystemZ/Large/branch-range-07.py b/test/CodeGen/SystemZ/Large/branch-range-07.py new file mode 100644 index 0000000..90c4420 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-07.py @@ -0,0 +1,68 @@ +# Test 32-bit BRANCH RELATIVE ON COUNT in cases where some branches are out +# of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# loopN: +# load of countN +# ... +# loop0: +# 0xffd8 bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# decrement of countN +# conditional branch to loopN +# afterN: +# +# Each load occupies 4 bytes. Each decrement and branch occupies 4 +# bytes if BRCT can be used, otherwise it occupies 10 bytes (AHI + BRCL). +# This means that loop 6 contains 5 * 4 + 0xffd8 + 5 * 4 == 0x10000 bytes +# and is therefore (just) in range. Loop 7 is out of range. +# +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: brct {{%r[0-9]+}} +# CHECK: ahi {{%r[0-9]+}}, -1 +# CHECK: jglh +# CHECK: ahi {{%r[0-9]+}}, -1 +# CHECK: jglh + +branch_blocks = 8 +main_size = 0xffd8 + +print 'define void @f1(i8 *%base, i32 *%counts) {' +print 'entry:' + +for i in xrange(branch_blocks - 1, -1, -1): + print ' %%countptr%d = getelementptr i32 *%%counts, i64 %d' % (i, i) + print ' %%initcount%d = load i32 *%%countptr%d' % (i, i) + print ' br label %%loop%d' % i + + print 'loop%d:' % i + block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1) + block2 = 'loop0' if i == 0 else 'after%d' % (i - 1) + print (' %%count%d = phi i32 [ %%initcount%d, %%%s ],' + ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2)) + +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%nextcount%d = add i32 %%count%d, -1' % (i, i) + print ' %%test%d = icmp ne i32 %%nextcount%d, 0' % (i, i) + print ' br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/Large/branch-range-08.py b/test/CodeGen/SystemZ/Large/branch-range-08.py new file mode 100644 index 0000000..ac1b137 --- /dev/null +++ b/test/CodeGen/SystemZ/Large/branch-range-08.py @@ -0,0 +1,69 @@ +# Test 64-bit BRANCH RELATIVE ON COUNT in cases where some branches are out +# of range. +# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s + +# Construct: +# +# loopN: +# load of countN +# ... +# loop0: +# 0xffd8 bytes, from MVIY instructions +# conditional branch to main +# after0: +# ... +# decrement of countN +# conditional branch to loopN +# afterN: +# +# Each load occupies 6 bytes. Each decrement and branch occupies 4 +# bytes if BRCTG can be used, otherwise it occupies 10 bytes (AGHI + BRCL). +# This means that loop 5 contains 4 * 6 + 0xffd8 + 4 * 4 == 0x10000 bytes +# and is therefore (just) in range. Loop 6 is out of range. +# +# CHECK: brctg {{%r[0-9]+}} +# CHECK: brctg {{%r[0-9]+}} +# CHECK: brctg {{%r[0-9]+}} +# CHECK: brctg {{%r[0-9]+}} +# CHECK: brctg {{%r[0-9]+}} +# CHECK: aghi {{%r[0-9]+}}, -1 +# CHECK: jglh +# CHECK: aghi {{%r[0-9]+}}, -1 +# CHECK: jglh +# CHECK: aghi {{%r[0-9]+}}, -1 +# CHECK: jglh + +branch_blocks = 8 +main_size = 0xffd8 + +print 'define void @f1(i8 *%base, i64 *%counts) {' +print 'entry:' + +for i in xrange(branch_blocks - 1, -1, -1): + print ' %%countptr%d = getelementptr i64 *%%counts, i64 %d' % (i, i) + print ' %%initcount%d = load i64 *%%countptr%d' % (i, i) + print ' br label %%loop%d' % i + + print 'loop%d:' % i + block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1) + block2 = 'loop0' if i == 0 else 'after%d' % (i - 1) + print (' %%count%d = phi i64 [ %%initcount%d, %%%s ],' + ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2)) + +a, b = 1, 1 +for i in xrange(0, main_size, 6): + a, b = b, a + b + offset = 4096 + b % 500000 + value = a % 256 + print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset) + print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) + +for i in xrange(branch_blocks): + print ' %%nextcount%d = add i64 %%count%d, -1' % (i, i) + print ' %%test%d = icmp ne i64 %%nextcount%d, 0' % (i, i) + print ' br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i) + print '' + print 'after%d:' % i + +print ' ret void' +print '}' diff --git a/test/CodeGen/SystemZ/loop-01.ll b/test/CodeGen/SystemZ/loop-01.ll index 025a34e..5800801 100644 --- a/test/CodeGen/SystemZ/loop-01.ll +++ b/test/CodeGen/SystemZ/loop-01.ll @@ -5,7 +5,7 @@ ; Test that strength reduction is applied to addresses with a scale factor, ; but that indexed addressing can still be used. define void @f1(i32 *%dest, i32 %a) { -; CHECK-LABEL: f1 +; CHECK-LABEL: f1: ; CHECK-NOT: sllg ; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}}) ; CHECK: br %r14 @@ -23,3 +23,102 @@ loop: exit: ret void } + +; Test a loop that should be converted into dbr form and then use BRCT. +define void @f2(i32 *%src, i32 *%dest) { +; CHECK-LABEL: f2: +; CHECK: lhi [[REG:%r[0-5]]], 100 +; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop +; CHECK: brct [[REG]], [[LABEL]] +; CHECK: br %r14 +entry: + br label %loop + +loop: + %count = phi i32 [ 0, %entry ], [ %next, %loop.next ] + %next = add i32 %count, 1 + %val = load volatile i32 *%src + %cmp = icmp eq i32 %val, 0 + br i1 %cmp, label %loop.next, label %loop.store + +loop.store: + %add = add i32 %val, 1 + store volatile i32 %add, i32 *%dest + br label %loop.next + +loop.next: + %cont = icmp ne i32 %next, 100 + br i1 %cont, label %loop, label %exit + +exit: + ret void +} + +; Like f2, but for BRCTG. +define void @f3(i64 *%src, i64 *%dest) { +; CHECK-LABEL: f3: +; CHECK: lghi [[REG:%r[0-5]]], 100 +; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop +; CHECK: brctg [[REG]], [[LABEL]] +; CHECK: br %r14 +entry: + br label %loop + +loop: + %count = phi i64 [ 0, %entry ], [ %next, %loop.next ] + %next = add i64 %count, 1 + %val = load volatile i64 *%src + %cmp = icmp eq i64 %val, 0 + br i1 %cmp, label %loop.next, label %loop.store + +loop.store: + %add = add i64 %val, 1 + store volatile i64 %add, i64 *%dest + br label %loop.next + +loop.next: + %cont = icmp ne i64 %next, 100 + br i1 %cont, label %loop, label %exit + +exit: + ret void +} + +; Test a loop with a 64-bit decremented counter in which the 32-bit +; low part of the counter is used after the decrement. This is an example +; of a subregister use being the only thing that blocks a conversion to BRCTG. +define void @f4(i32 *%src, i32 *%dest, i64 *%dest2, i64 %count) { +; CHECK-LABEL: f4: +; CHECK: aghi [[REG:%r[0-5]]], -1 +; CHECK: lr [[REG2:%r[0-5]]], [[REG]] +; CHECK: stg [[REG2]], +; CHECK: jne {{\..*}} +; CHECK: br %r14 +entry: + br label %loop + +loop: + %left = phi i64 [ %count, %entry ], [ %next, %loop.next ] + store volatile i64 %left, i64 *%dest2 + %val = load volatile i32 *%src + %cmp = icmp eq i32 %val, 0 + br i1 %cmp, label %loop.next, label %loop.store + +loop.store: + %add = add i32 %val, 1 + store volatile i32 %add, i32 *%dest + br label %loop.next + +loop.next: + %next = add i64 %left, -1 + %ext = zext i32 %val to i64 + %shl = shl i64 %ext, 32 + %and = and i64 %next, 4294967295 + %or = or i64 %shl, %and + store volatile i64 %or, i64 *%dest2 + %cont = icmp ne i64 %next, 0 + br i1 %cont, label %loop, label %exit + +exit: + ret void +} |