diff options
296 files changed, 31125 insertions, 0 deletions
diff --git a/test/CodeGen/SystemZ/addr-01.ll b/test/CodeGen/SystemZ/addr-01.ll new file mode 100644 index 0000000..c125ffa --- /dev/null +++ b/test/CodeGen/SystemZ/addr-01.ll @@ -0,0 +1,107 @@ +; Test selection of addresses with indices in cases where the address +; is used once. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; A simple index address. +define void @f1(i64 %addr, i64 %index) { +; CHECK: f1: +; CHECK: lb %r0, 0(%r3,%r2) +; CHECK: br %r14 + %add = add i64 %addr, %index + %ptr = inttoptr i64 %add to i8 * + %a = load volatile i8 *%ptr + ret void +} + +; An address with an index and a displacement (order 1). +define void @f2(i64 %addr, i64 %index) { +; CHECK: f2: +; CHECK: lb %r0, 100(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %addr, %index + %add2 = add i64 %add1, 100 + %ptr = inttoptr i64 %add2 to i8 * + %a = load volatile i8 *%ptr + ret void +} + +; An address with an index and a displacement (order 2). +define void @f3(i64 %addr, i64 %index) { +; CHECK: f3: +; CHECK: lb %r0, 100(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %addr, 100 + %add2 = add i64 %add1, %index + %ptr = inttoptr i64 %add2 to i8 * + %a = load volatile i8 *%ptr + ret void +} + +; An address with an index and a subtracted displacement (order 1). +define void @f4(i64 %addr, i64 %index) { +; CHECK: f4: +; CHECK: lb %r0, -100(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %addr, %index + %add2 = sub i64 %add1, 100 + %ptr = inttoptr i64 %add2 to i8 * + %a = load volatile i8 *%ptr + ret void +} + +; An address with an index and a subtracted displacement (order 2). +define void @f5(i64 %addr, i64 %index) { +; CHECK: f5: +; CHECK: lb %r0, -100(%r3,%r2) +; CHECK: br %r14 + %add1 = sub i64 %addr, 100 + %add2 = add i64 %add1, %index + %ptr = inttoptr i64 %add2 to i8 * + %a = load volatile i8 *%ptr + ret void +} + +; An address with an index and a displacement added using OR. +define void @f6(i64 %addr, i64 %index) { +; CHECK: f6: +; CHECK: nill %r2, 65528 +; CHECK: lb %r0, 6(%r3,%r2) +; CHECK: br %r14 + %aligned = and i64 %addr, -8 + %or = or i64 %aligned, 6 + %add = add i64 %or, %index + %ptr = inttoptr i64 %add to i8 * + %a = load volatile i8 *%ptr + ret void +} + +; Like f6, but without the masking. This OR doesn't count as a displacement. +define void @f7(i64 %addr, i64 %index) { +; CHECK: f7: +; CHECK: oill %r2, 6 +; CHECK: lb %r0, 0(%r3,%r2) +; CHECK: br %r14 + %or = or i64 %addr, 6 + %add = add i64 %or, %index + %ptr = inttoptr i64 %add to i8 * + %a = load volatile i8 *%ptr + ret void +} + +; Like f6, but with the OR applied after the index. We don't know anything +; about the alignment of %add here. +define void @f8(i64 %addr, i64 %index) { +; CHECK: f8: +; CHECK: nill %r2, 65528 +; CHECK: agr %r2, %r3 +; CHECK: oill %r2, 6 +; CHECK: lb %r0, 0(%r2) +; CHECK: br %r14 + %aligned = and i64 %addr, -8 + %add = add i64 %aligned, %index + %or = or i64 %add, 6 + %ptr = inttoptr i64 %or to i8 * + %a = load volatile i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/addr-02.ll b/test/CodeGen/SystemZ/addr-02.ll new file mode 100644 index 0000000..6772c1d --- /dev/null +++ b/test/CodeGen/SystemZ/addr-02.ll @@ -0,0 +1,116 @@ +; addr-01.ll in which the address is also used in a non-address context. +; The assumption here is that we should match complex addresses where +; possible, but this might well need to change in future. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; A simple index address. +define void @f1(i64 %addr, i64 %index, i8 **%dst) { +; CHECK: f1: +; CHECK: lb %r0, 0(%r3,%r2) +; CHECK: br %r14 + %add = add i64 %addr, %index + %ptr = inttoptr i64 %add to i8 * + %a = load volatile i8 *%ptr + store volatile i8 *%ptr, i8 **%dst + ret void +} + +; An address with an index and a displacement (order 1). +define void @f2(i64 %addr, i64 %index, i8 **%dst) { +; CHECK: f2: +; CHECK: lb %r0, 100(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %addr, %index + %add2 = add i64 %add1, 100 + %ptr = inttoptr i64 %add2 to i8 * + %a = load volatile i8 *%ptr + store volatile i8 *%ptr, i8 **%dst + ret void +} + +; An address with an index and a displacement (order 2). +define void @f3(i64 %addr, i64 %index, i8 **%dst) { +; CHECK: f3: +; CHECK: lb %r0, 100(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %addr, 100 + %add2 = add i64 %add1, %index + %ptr = inttoptr i64 %add2 to i8 * + %a = load volatile i8 *%ptr + store volatile i8 *%ptr, i8 **%dst + ret void +} + +; An address with an index and a subtracted displacement (order 1). +define void @f4(i64 %addr, i64 %index, i8 **%dst) { +; CHECK: f4: +; CHECK: lb %r0, -100(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %addr, %index + %add2 = sub i64 %add1, 100 + %ptr = inttoptr i64 %add2 to i8 * + %a = load volatile i8 *%ptr + store volatile i8 *%ptr, i8 **%dst + ret void +} + +; An address with an index and a subtracted displacement (order 2). +define void @f5(i64 %addr, i64 %index, i8 **%dst) { +; CHECK: f5: +; CHECK: lb %r0, -100(%r3,%r2) +; CHECK: br %r14 + %add1 = sub i64 %addr, 100 + %add2 = add i64 %add1, %index + %ptr = inttoptr i64 %add2 to i8 * + %a = load volatile i8 *%ptr + store volatile i8 *%ptr, i8 **%dst + ret void +} + +; An address with an index and a displacement added using OR. +define void @f6(i64 %addr, i64 %index, i8 **%dst) { +; CHECK: f6: +; CHECK: nill %r2, 65528 +; CHECK: lb %r0, 6(%r3,%r2) +; CHECK: br %r14 + %aligned = and i64 %addr, -8 + %or = or i64 %aligned, 6 + %add = add i64 %or, %index + %ptr = inttoptr i64 %add to i8 * + %a = load volatile i8 *%ptr + store volatile i8 *%ptr, i8 **%dst + ret void +} + +; Like f6, but without the masking. This OR doesn't count as a displacement. +define void @f7(i64 %addr, i64 %index, i8 **%dst) { +; CHECK: f7: +; CHECK: oill %r2, 6 +; CHECK: lb %r0, 0(%r3,%r2) +; CHECK: br %r14 + %or = or i64 %addr, 6 + %add = add i64 %or, %index + %ptr = inttoptr i64 %add to i8 * + %a = load volatile i8 *%ptr + store volatile i8 *%ptr, i8 **%dst + ret void +} + +; Like f6, but with the OR applied after the index. We don't know anything +; about the alignment of %add here. +define void @f8(i64 %addr, i64 %index, i8 **%dst) { +; CHECK: f8: +; CHECK: nill %r2, 65528 +; CHECK: agr %r2, %r3 +; CHECK: oill %r2, 6 +; CHECK: lb %r0, 0(%r2) +; CHECK: br %r14 + %aligned = and i64 %addr, -8 + %add = add i64 %aligned, %index + %or = or i64 %add, 6 + %ptr = inttoptr i64 %or to i8 * + %a = load volatile i8 *%ptr + store volatile i8 *%ptr, i8 **%dst + ret void +} diff --git a/test/CodeGen/SystemZ/addr-03.ll b/test/CodeGen/SystemZ/addr-03.ll new file mode 100644 index 0000000..dbdb9f1 --- /dev/null +++ b/test/CodeGen/SystemZ/addr-03.ll @@ -0,0 +1,48 @@ +; Test constant addresses, unlikely as they are. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1() { +; CHECK: f1: +; CHECK: lb %r0, 0 +; CHECK: br %r14 + %ptr = inttoptr i64 0 to i8 * + %val = load volatile i8 *%ptr + ret void +} + +define void @f2() { +; CHECK: f2: +; CHECK: lb %r0, -524288 +; CHECK: br %r14 + %ptr = inttoptr i64 -524288 to i8 * + %val = load volatile i8 *%ptr + ret void +} + +define void @f3() { +; CHECK: f3: +; CHECK-NOT: lb %r0, -524289 +; CHECK: br %r14 + %ptr = inttoptr i64 -524289 to i8 * + %val = load volatile i8 *%ptr + ret void +} + +define void @f4() { +; CHECK: f4: +; CHECK: lb %r0, 524287 +; CHECK: br %r14 + %ptr = inttoptr i64 524287 to i8 * + %val = load volatile i8 *%ptr + ret void +} + +define void @f5() { +; CHECK: f5: +; CHECK-NOT: lb %r0, 524288 +; CHECK: br %r14 + %ptr = inttoptr i64 524288 to i8 * + %val = load volatile i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/alloca-01.ll b/test/CodeGen/SystemZ/alloca-01.ll new file mode 100644 index 0000000..1852c91 --- /dev/null +++ b/test/CodeGen/SystemZ/alloca-01.ll @@ -0,0 +1,81 @@ +; Test variable-sized allocas and addresses based on them in cases where +; stack arguments are needed. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK2 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-A +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-B +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-C +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-D +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP + +declare i64 @bar(i8 *%a, i8 *%b, i8 *%c, i8 *%d, i8 *%e, i64 %f, i64 %g) + +; Allocate %length bytes and take addresses based on the result. +; There are two stack arguments, so an offset of 160 + 2 * 8 == 176 +; is added to the copy of %r15. +define i64 @f1(i64 %length, i64 %index) { +; The full allocation sequence is: +; +; la %r0, 7(%r2) 1 +; nill %r0, 0xfff8 1 +; lgr %r1, %r15 2 +; sgr %r1, %r0 1 2 +; lgr %r15, %r1 2 +; +; The third instruction does not depend on the first two, so check for +; two fully-ordered sequences. +; +; FIXME: a better sequence would be: +; +; lgr %r1, %r15 +; sgr %r1, %r2 +; nill %r1, 0xfff8 +; lgr %r15, %r1 +; +; CHECK1: f1: +; CHECK1: la %r0, 7(%r2) +; CHECK1: nill %r0, 65528 +; CHECK1: sgr %r1, %r0 +; CHECK1: lgr %r15, %r1 +; +; CHECK2: f1: +; CHECK2: lgr %r1, %r15 +; CHECK2: sgr %r1, %r0 +; CHECK2: lgr %r15, %r1 +; +; CHECK-A: f1: +; CHECK-A: lgr %r15, %r1 +; CHECK-A: la %r2, 176(%r1) +; +; CHECK-B: f1: +; CHECK-B: lgr %r15, %r1 +; CHECK-B: la %r3, 177(%r1) +; +; CHECK-C: f1: +; CHECK-C: lgr %r15, %r1 +; CHECK-C: la %r4, 4095({{%r3,%r1|%r1,%r3}}) +; +; CHECK-D: f1: +; CHECK-D: lgr %r15, %r1 +; CHECK-D: lay %r5, 4096({{%r3,%r1|%r1,%r3}}) +; +; CHECK-E: f1: +; CHECK-E: lgr %r15, %r1 +; CHECK-E: lay %r6, 4271({{%r3,%r1|%r1,%r3}}) +; +; CHECK-FP: f1: +; CHECK-FP: lgr %r11, %r15 +; CHECK-FP: lmg %r6, %r15, 224(%r11) + %a = alloca i8, i64 %length + %b = getelementptr i8 *%a, i64 1 + %cindex = add i64 %index, 3919 + %c = getelementptr i8 *%a, i64 %cindex + %dindex = add i64 %index, 3920 + %d = getelementptr i8 *%a, i64 %dindex + %eindex = add i64 %index, 4095 + %e = getelementptr i8 *%a, i64 %eindex + %count = call i64 @bar(i8 *%a, i8 *%b, i8 *%c, i8 *%d, i8 *%e, i64 0, i64 0) + %res = add i64 %count, 1 + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/alloca-02.ll b/test/CodeGen/SystemZ/alloca-02.ll new file mode 100644 index 0000000..fbb095f --- /dev/null +++ b/test/CodeGen/SystemZ/alloca-02.ll @@ -0,0 +1,49 @@ +; Make sure that the alloca offset isn't lost when the alloca result is +; used directly in a load or store. There must always be an LA or LAY. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-A +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-B +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-C +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-D + +declare i64 @bar(i8 *%a) + +define i64 @f1(i64 %length, i64 %index) { +; CHECK-A: f1: +; CHECK-A: lgr %r15, [[ADDR:%r[1-5]]] +; CHECK-A: la %r2, 160([[ADDR]]) +; CHECK-A: mvi 0(%r2), 0 +; +; CHECK-B: f1: +; CHECK-B: lgr %r15, [[ADDR:%r[1-5]]] +; CHECK-B: la %r2, 160([[ADDR]]) +; CHECK-B: mvi 4095(%r2), 1 +; +; CHECK-C: f1: +; CHECK-C: lgr %r15, [[ADDR:%r[1-5]]] +; CHECK-C: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]]) +; CHECK-C: mvi 0([[TMP]]), 2 +; +; CHECK-D: f1: +; CHECK-D: lgr %r15, [[ADDR:%r[1-5]]] +; CHECK-D: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]]) +; CHECK-D: mvi 4095([[TMP]]), 3 +; +; CHECK-E: f1: +; CHECK-E: lgr %r15, [[ADDR:%r[1-5]]] +; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]]) +; CHECK-E: mviy 4096([[TMP]]), 4 + %a = alloca i8, i64 %length + store i8 0, i8 *%a + %b = getelementptr i8 *%a, i64 4095 + store i8 1, i8 *%b + %c = getelementptr i8 *%a, i64 %index + store i8 2, i8 *%c + %d = getelementptr i8 *%c, i64 4095 + store i8 3, i8 *%d + %e = getelementptr i8 *%d, i64 1 + store i8 4, i8 *%e + %count = call i64 @bar(i8 *%a) + %res = add i64 %count, 1 + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/and-01.ll b/test/CodeGen/SystemZ/and-01.ll new file mode 100644 index 0000000..8dd106b --- /dev/null +++ b/test/CodeGen/SystemZ/and-01.ll @@ -0,0 +1,129 @@ +; Test 32-bit ANDs in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check NR. +define i32 @f1(i32 %a, i32 %b) { +; CHECK: f1: +; CHECK: nr %r2, %r3 +; CHECK: br %r14 + %and = and i32 %a, %b + ret i32 %and +} + +; Check the low end of the N range. +define i32 @f2(i32 %a, i32 *%src) { +; CHECK: f2: +; CHECK: n %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %and = and i32 %a, %b + ret i32 %and +} + +; Check the high end of the aligned N range. +define i32 @f3(i32 %a, i32 *%src) { +; CHECK: f3: +; CHECK: n %r2, 4092(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} + +; Check the next word up, which should use NY instead of N. +define i32 @f4(i32 %a, i32 *%src) { +; CHECK: f4: +; CHECK: ny %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} + +; Check the high end of the aligned NY range. +define i32 @f5(i32 %a, i32 *%src) { +; CHECK: f5: +; CHECK: ny %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i32 %a, i32 *%src) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: n %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} + +; Check the high end of the negative aligned NY range. +define i32 @f7(i32 %a, i32 *%src) { +; CHECK: f7: +; CHECK: ny %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} + +; Check the low end of the NY range. +define i32 @f8(i32 %a, i32 *%src) { +; CHECK: f8: +; CHECK: ny %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f9(i32 %a, i32 *%src) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: n %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} + +; Check that N allows an index. +define i32 @f10(i32 %a, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: n %r2, 4092({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} + +; Check that NY allows an index. +define i32 @f11(i32 %a, i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: ny %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %and = and i32 %a, %b + ret i32 %and +} diff --git a/test/CodeGen/SystemZ/and-02.ll b/test/CodeGen/SystemZ/and-02.ll new file mode 100644 index 0000000..a0fff81 --- /dev/null +++ b/test/CodeGen/SystemZ/and-02.ll @@ -0,0 +1,93 @@ +; Test 32-bit ANDs in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful NILF value. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: nilf %r2, 1 +; CHECK: br %r14 + %and = and i32 %a, 1 + ret i32 %and +} + +; Check the highest 16-bit constant that must be handled by NILF. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK: nilf %r2, 65534 +; CHECK: br %r14 + %and = and i32 %a, 65534 + ret i32 %and +} + +; ANDs of 0xffff are zero extensions from i16. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK: llhr %r2, %r2 +; CHECK: br %r14 + %and = and i32 %a, 65535 + ret i32 %and +} + +; Check the next value up, which must again use NILF. +define i32 @f4(i32 %a) { +; CHECK: f4: +; CHECK: nilf %r2, 65536 +; CHECK: br %r14 + %and = and i32 %a, 65536 + ret i32 %and +} + +; Check the lowest useful NILH value. (LLHR is used instead of NILH of 0.) +define i32 @f5(i32 %a) { +; CHECK: f5: +; CHECK: nilh %r2, 1 +; CHECK: br %r14 + %and = and i32 %a, 131071 + ret i32 %and +} + +; Check the highest useful NILF value. +define i32 @f6(i32 %a) { +; CHECK: f6: +; CHECK: nilf %r2, 4294901758 +; CHECK: br %r14 + %and = and i32 %a, -65538 + ret i32 %and +} + +; Check the highest useful NILH value, which is one up from the above. +define i32 @f7(i32 %a) { +; CHECK: f7: +; CHECK: nilh %r2, 65534 +; CHECK: br %r14 + %and = and i32 %a, -65537 + ret i32 %and +} + +; Check the low end of the NILL range, which is one up again. +define i32 @f8(i32 %a) { +; CHECK: f8: +; CHECK: nill %r2, 0 +; CHECK: br %r14 + %and = and i32 %a, -65536 + ret i32 %and +} + +; Check the next value up. +define i32 @f9(i32 %a) { +; CHECK: f9: +; CHECK: nill %r2, 1 +; CHECK: br %r14 + %and = and i32 %a, -65535 + ret i32 %and +} + +; Check the highest useful NILL value. +define i32 @f10(i32 %a) { +; CHECK: f10: +; CHECK: nill %r2, 65534 +; CHECK: br %r14 + %and = and i32 %a, -2 + ret i32 %and +} diff --git a/test/CodeGen/SystemZ/and-03.ll b/test/CodeGen/SystemZ/and-03.ll new file mode 100644 index 0000000..3fe8d3c --- /dev/null +++ b/test/CodeGen/SystemZ/and-03.ll @@ -0,0 +1,94 @@ +; Test 64-bit ANDs in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check NGR. +define i64 @f1(i64 %a, i64 %b) { +; CHECK: f1: +; CHECK: ngr %r2, %r3 +; CHECK: br %r14 + %and = and i64 %a, %b + ret i64 %and +} + +; Check NG with no displacement. +define i64 @f2(i64 %a, i64 *%src) { +; CHECK: f2: +; CHECK: ng %r2, 0(%r3) +; CHECK: br %r14 + %b = load i64 *%src + %and = and i64 %a, %b + ret i64 %and +} + +; Check the high end of the aligned NG range. +define i64 @f3(i64 %a, i64 *%src) { +; CHECK: f3: +; CHECK: ng %r2, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %and = and i64 %a, %b + ret i64 %and +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: ng %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %and = and i64 %a, %b + ret i64 %and +} + +; Check the high end of the negative aligned NG range. +define i64 @f5(i64 %a, i64 *%src) { +; CHECK: f5: +; CHECK: ng %r2, -8(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %and = and i64 %a, %b + ret i64 %and +} + +; Check the low end of the NG range. +define i64 @f6(i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK: ng %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %and = and i64 %a, %b + ret i64 %and +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524296 +; CHECK: ng %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %and = and i64 %a, %b + ret i64 %and +} + +; Check that NG allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: ng %r2, 524280({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %and = and i64 %a, %b + ret i64 %and +} diff --git a/test/CodeGen/SystemZ/and-04.ll b/test/CodeGen/SystemZ/and-04.ll new file mode 100644 index 0000000..62def60 --- /dev/null +++ b/test/CodeGen/SystemZ/and-04.ll @@ -0,0 +1,180 @@ +; Test 64-bit ANDs in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; There is no 64-bit AND instruction for a mask of 1. +; FIXME: we ought to be able to require "ngr %r2, %r0", but at the moment, +; two-address optimisations force "ngr %r0, %r2; lgr %r2, %r0" instead. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: lghi %r0, 1 +; CHECK: ngr +; CHECK: br %r14 + %and = and i64 %a, 1 + ret i64 %and +} + +; Likewise 0xfffe. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: llill %r0, 65534 +; CHECK: ngr +; CHECK: br %r14 + %and = and i64 %a, 65534 + ret i64 %and +} + +; ...but 0xffff is a 16-bit zero extension. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK: llghr %r2, %r2 +; CHECK: br %r14 + %and = and i64 %a, 65535 + ret i64 %and +} + +; Check the next value up, which again has no dedicated instruction. +define i64 @f4(i64 %a) { +; CHECK: f4: +; CHECK: llilh %r0, 1 +; CHECK: ngr +; CHECK: br %r14 + %and = and i64 %a, 65536 + ret i64 %and +} + +; Check 0xfffffffe. +define i64 @f5(i64 %a) { +; CHECK: f5: +; CHECK: lilf %r0, 4294967294 +; CHECK: ngr +; CHECK: br %r14 + %and = and i64 %a, 4294967294 + ret i64 %and +} + +; Check the next value up, which is a 32-bit zero extension. +define i64 @f6(i64 %a) { +; CHECK: f6: +; CHECK: llgfr %r2, %r2 +; CHECK: br %r14 + %and = and i64 %a, 4294967295 + ret i64 %and +} + +; Check the lowest useful NIHF value (0x00000001_ffffffff). +define i64 @f7(i64 %a) { +; CHECK: f7: +; CHECK: nihf %r2, 1 +; CHECK: br %r14 + %and = and i64 %a, 8589934591 + ret i64 %and +} + +; Check the low end of the NIHH range (0x0000ffff_ffffffff). +define i64 @f8(i64 %a) { +; CHECK: f8: +; CHECK: nihh %r2, 0 +; CHECK: br %r14 + %and = and i64 %a, 281474976710655 + ret i64 %and +} + +; Check the highest useful NIHH value (0xfffeffff_ffffffff). +define i64 @f9(i64 %a) { +; CHECK: f9: +; CHECK: nihh %r2, 65534 +; CHECK: br %r14 + %and = and i64 %a, -281474976710657 + ret i64 %and +} + +; Check the highest useful NIHF value (0xfffefffe_ffffffff). +define i64 @f10(i64 %a) { +; CHECK: f10: +; CHECK: nihf %r2, 4294901758 +; CHECK: br %r14 + %and = and i64 %a, -281479271677953 + ret i64 %and +} + +; Check the low end of the NIHL range (0xffff0000_ffffffff). +define i64 @f11(i64 %a) { +; CHECK: f11: +; CHECK: nihl %r2, 0 +; CHECK: br %r14 + %and = and i64 %a, -281470681743361 + ret i64 %and +} + +; Check the highest useful NIHL value (0xfffffffe_ffffffff). +define i64 @f12(i64 %a) { +; CHECK: f12: +; CHECK: nihl %r2, 65534 +; CHECK: br %r14 + %and = and i64 %a, -4294967297 + ret i64 %and +} + +; Check the low end of the NILF range (0xffffffff_00000000). +define i64 @f13(i64 %a) { +; CHECK: f13: +; CHECK: nilf %r2, 0 +; CHECK: br %r14 + %and = and i64 %a, -4294967296 + ret i64 %and +} + +; Check the low end of the NILH range (0xffffffff_0000ffff). +define i64 @f14(i64 %a) { +; CHECK: f14: +; CHECK: nilh %r2, 0 +; CHECK: br %r14 + %and = and i64 %a, -4294901761 + ret i64 %and +} + +; Check the next value up, which must use NILF. +define i64 @f15(i64 %a) { +; CHECK: f15: +; CHECK: nilf %r2, 65536 +; CHECK: br %r14 + %and = and i64 %a, -4294901760 + ret i64 %and +} + +; Check the maximum useful NILF value (0xffffffff_fffefffe). +define i64 @f16(i64 %a) { +; CHECK: f16: +; CHECK: nilf %r2, 4294901758 +; CHECK: br %r14 + %and = and i64 %a, -65538 + ret i64 %and +} + +; Check the highest useful NILH value, which is one greater than the above. +define i64 @f17(i64 %a) { +; CHECK: f17: +; CHECK: nilh %r2, 65534 +; CHECK: br %r14 + %and = and i64 %a, -65537 + ret i64 %and +} + +; Check the low end of the NILL range, which is one greater again. +define i64 @f18(i64 %a) { +; CHECK: f18: +; CHECK: nill %r2, 0 +; CHECK: br %r14 + %and = and i64 %a, -65536 + ret i64 %and +} + +; Check the highest useful NILL value. +define i64 @f19(i64 %a) { +; CHECK: f19: +; CHECK: nill %r2, 65534 +; CHECK: br %r14 + %and = and i64 %a, -2 + ret i64 %and +} diff --git a/test/CodeGen/SystemZ/and-05.ll b/test/CodeGen/SystemZ/and-05.ll new file mode 100644 index 0000000..4573911 --- /dev/null +++ b/test/CodeGen/SystemZ/and-05.ll @@ -0,0 +1,165 @@ +; Test ANDs of a constant into a byte of memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful constant, expressed as a signed integer. +define void @f1(i8 *%ptr) { +; CHECK: f1: +; CHECK: ni 0(%r2), 1 +; CHECK: br %r14 + %val = load i8 *%ptr + %and = and i8 %val, -255 + store i8 %and, i8 *%ptr + ret void +} + +; Check the highest useful constant, expressed as a signed integer. +define void @f2(i8 *%ptr) { +; CHECK: f2: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %and = and i8 %val, -2 + store i8 %and, i8 *%ptr + ret void +} + +; Check the lowest useful constant, expressed as an unsigned integer. +define void @f3(i8 *%ptr) { +; CHECK: f3: +; CHECK: ni 0(%r2), 1 +; CHECK: br %r14 + %val = load i8 *%ptr + %and = and i8 %val, 1 + store i8 %and, i8 *%ptr + ret void +} + +; Check the highest useful constant, expressed as a unsigned integer. +define void @f4(i8 *%ptr) { +; CHECK: f4: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %and = and i8 %val, 254 + store i8 %and, i8 *%ptr + ret void +} + +; Check the high end of the NI range. +define void @f5(i8 *%src) { +; CHECK: f5: +; CHECK: ni 4095(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4095 + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} + +; Check the next byte up, which should use NIY instead of NI. +define void @f6(i8 *%src) { +; CHECK: f6: +; CHECK: niy 4096(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4096 + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} + +; Check the high end of the NIY range. +define void @f7(i8 *%src) { +; CHECK: f7: +; CHECK: niy 524287(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(i8 *%src) { +; CHECK: f8: +; CHECK: agfi %r2, 524288 +; CHECK: ni 0(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} + +; Check the high end of the negative NIY range. +define void @f9(i8 *%src) { +; CHECK: f9: +; CHECK: niy -1(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} + +; Check the low end of the NIY range. +define void @f10(i8 *%src) { +; CHECK: f10: +; CHECK: niy -524288(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f11(i8 *%src) { +; CHECK: f11: +; CHECK: agfi %r2, -524289 +; CHECK: ni 0(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} + +; Check that NI does not allow an index +define void @f12(i64 %src, i64 %index) { +; CHECK: f12: +; CHECK: agr %r2, %r3 +; CHECK: ni 4095(%r2), 127 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to i8 * + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} + +; Check that NIY does not allow an index +define void @f13(i64 %src, i64 %index) { +; CHECK: f13: +; CHECK: agr %r2, %r3 +; CHECK: niy 4096(%r2), 127 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i8 * + %val = load i8 *%ptr + %and = and i8 %val, 127 + store i8 %and, i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/and-06.ll b/test/CodeGen/SystemZ/and-06.ll new file mode 100644 index 0000000..bbb5e7b --- /dev/null +++ b/test/CodeGen/SystemZ/and-06.ll @@ -0,0 +1,108 @@ +; Test that we can use NI for byte operations that are expressed as i32 +; or i64 operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Zero extension to 32 bits, negative constant. +define void @f1(i8 *%ptr) { +; CHECK: f1: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %and = and i32 %ext, -2 + %trunc = trunc i32 %and to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 64 bits, negative constant. +define void @f2(i8 *%ptr) { +; CHECK: f2: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %and = and i64 %ext, -2 + %trunc = trunc i64 %and to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 32 bits, positive constant. +define void @f3(i8 *%ptr) { +; CHECK: f3: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %and = and i32 %ext, 254 + %trunc = trunc i32 %and to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 64 bits, positive constant. +define void @f4(i8 *%ptr) { +; CHECK: f4: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %and = and i64 %ext, 254 + %trunc = trunc i64 %and to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 32 bits, negative constant. +define void @f5(i8 *%ptr) { +; CHECK: f5: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %and = and i32 %ext, -2 + %trunc = trunc i32 %and to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 64 bits, negative constant. +define void @f6(i8 *%ptr) { +; CHECK: f6: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %and = and i64 %ext, -2 + %trunc = trunc i64 %and to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 32 bits, positive constant. +define void @f7(i8 *%ptr) { +; CHECK: f7: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %and = and i32 %ext, 254 + %trunc = trunc i32 %and to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 64 bits, positive constant. +define void @f8(i8 *%ptr) { +; CHECK: f8: +; CHECK: ni 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %and = and i64 %ext, 254 + %trunc = trunc i64 %and to i8 + store i8 %trunc, i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/args-01.ll b/test/CodeGen/SystemZ/args-01.ll new file mode 100644 index 0000000..a6b80c5 --- /dev/null +++ b/test/CodeGen/SystemZ/args-01.ll @@ -0,0 +1,74 @@ +; Test the handling of GPR, FPR and stack arguments when no extension +; type is given. This type of argument is used for passing structures, etc. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-INT +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FLOAT +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-DOUBLE +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-2 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-STACK + +declare void @bar(i8, i16, i32, i64, float, double, fp128, i64, + float, double, i8, i16, i32, i64, float, double, fp128) + +; There are two indirect fp128 slots, one at offset 224 (the first available +; byte after the outgoing arguments) and one immediately after it at 240. +; These slots should be set up outside the glued call sequence, so would +; normally use %f0/%f2 as the first available 128-bit pair. This choice +; is hard-coded in the FP128 tests. +; +; The order of the CHECK-INT loads doesn't matter. The same goes for the +; CHECK_FP128-* stores and the CHECK-STACK stores. It would be OK to reorder +; them in response to future code changes. +define void @foo() { +; CHECK-INT: foo: +; CHECK-INT: lhi %r2, 1 +; CHECK-INT: lhi %r3, 2 +; CHECK-INT: lhi %r4, 3 +; CHECK-INT: lghi %r5, 4 +; CHECK-INT: la %r6, {{224|240}}(%r15) +; CHECK-INT: brasl %r14, bar@PLT +; +; CHECK-FLOAT: foo: +; CHECK-FLOAT: lzer %f0 +; CHECK-FLOAT: lcebr %f4, %f0 +; CHECK-FLOAT: brasl %r14, bar@PLT +; +; CHECK-DOUBLE: foo: +; CHECK-DOUBLE: lzdr %f2 +; CHECK-DOUBLE: lcdbr %f6, %f2 +; CHECK-DOUBLE: brasl %r14, bar@PLT +; +; CHECK-FP128-1: foo: +; CHECK-FP128-1: aghi %r15, -256 +; CHECK-FP128-1: lzxr %f0 +; CHECK-FP128-1: std %f0, 224(%r15) +; CHECK-FP128-1: std %f2, 232(%r15) +; CHECK-FP128-1: brasl %r14, bar@PLT +; +; CHECK-FP128-2: foo: +; CHECK-FP128-2: aghi %r15, -256 +; CHECK-FP128-2: lzxr %f0 +; CHECK-FP128-2: std %f0, 240(%r15) +; CHECK-FP128-2: std %f2, 248(%r15) +; CHECK-FP128-2: brasl %r14, bar@PLT +; +; CHECK-STACK: foo: +; CHECK-STACK: aghi %r15, -256 +; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15) +; CHECK-STACK: stg [[REGISTER]], 216(%r15) +; CHECK-STACK: mvghi 208(%r15), 0 +; CHECK-STACK: mvhi 204(%r15), 0 +; CHECK-STACK: mvghi 192(%r15), 9 +; CHECK-STACK: mvhi 188(%r15), 8 +; CHECK-STACK: mvhi 180(%r15), 7 +; CHECK-STACK: mvhi 172(%r15), 6 +; CHECK-STACK: mvghi 160(%r15), 5 +; CHECK-STACK: brasl %r14, bar@PLT + + call void @bar (i8 1, i16 2, i32 3, i64 4, float 0.0, double 0.0, + fp128 0xL00000000000000000000000000000000, i64 5, + float -0.0, double -0.0, i8 6, i16 7, i32 8, i64 9, float 0.0, + double 0.0, fp128 0xL00000000000000000000000000000000) + ret void +} diff --git a/test/CodeGen/SystemZ/args-02.ll b/test/CodeGen/SystemZ/args-02.ll new file mode 100644 index 0000000..9ea111c --- /dev/null +++ b/test/CodeGen/SystemZ/args-02.ll @@ -0,0 +1,76 @@ +; Test the handling of GPR, FPR and stack arguments when integers are +; sign-extended. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-INT +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FLOAT +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-DOUBLE +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-2 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-STACK + +declare void @bar(i8 signext, i16 signext, i32 signext, i64, float, double, + fp128, i64, float, double, i8 signext, i16 signext, + i32 signext, i64, float, double, fp128) + +; There are two indirect fp128 slots, one at offset 224 (the first available +; byte after the outgoing arguments) and one immediately after it at 240. +; These slots should be set up outside the glued call sequence, so would +; normally use %f0/%f2 as the first available 128-bit pair. This choice +; is hard-coded in the FP128 tests. +; +; The order of the CHECK-INT loads doesn't matter. The same goes for the +; CHECK_FP128-* stores and the CHECK-STACK stores. It would be OK to reorder +; them in response to future code changes. +define void @foo() { +; CHECK-INT: foo: +; CHECK-INT: lghi %r2, -1 +; CHECK-INT: lghi %r3, -2 +; CHECK-INT: lghi %r4, -3 +; CHECK-INT: lghi %r5, -4 +; CHECK-INT: la %r6, {{224|240}}(%r15) +; CHECK-INT: brasl %r14, bar@PLT +; +; CHECK-FLOAT: foo: +; CHECK-FLOAT: lzer %f0 +; CHECK-FLOAT: lcebr %f4, %f0 +; CHECK-FLOAT: brasl %r14, bar@PLT +; +; CHECK-DOUBLE: foo: +; CHECK-DOUBLE: lzdr %f2 +; CHECK-DOUBLE: lcdbr %f6, %f2 +; CHECK-DOUBLE: brasl %r14, bar@PLT +; +; CHECK-FP128-1: foo: +; CHECK-FP128-1: aghi %r15, -256 +; CHECK-FP128-1: lzxr %f0 +; CHECK-FP128-1: std %f0, 224(%r15) +; CHECK-FP128-1: std %f2, 232(%r15) +; CHECK-FP128-1: brasl %r14, bar@PLT +; +; CHECK-FP128-2: foo: +; CHECK-FP128-2: aghi %r15, -256 +; CHECK-FP128-2: lzxr %f0 +; CHECK-FP128-2: std %f0, 240(%r15) +; CHECK-FP128-2: std %f2, 248(%r15) +; CHECK-FP128-2: brasl %r14, bar@PLT +; +; CHECK-STACK: foo: +; CHECK-STACK: aghi %r15, -256 +; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15) +; CHECK-STACK: stg [[REGISTER]], 216(%r15) +; CHECK-STACK: mvghi 208(%r15), 0 +; CHECK-STACK: mvhi 204(%r15), 0 +; CHECK-STACK: mvghi 192(%r15), -9 +; CHECK-STACK: mvghi 184(%r15), -8 +; CHECK-STACK: mvghi 176(%r15), -7 +; CHECK-STACK: mvghi 168(%r15), -6 +; CHECK-STACK: mvghi 160(%r15), -5 +; CHECK-STACK: brasl %r14, bar@PLT + + call void @bar (i8 -1, i16 -2, i32 -3, i64 -4, float 0.0, double 0.0, + fp128 0xL00000000000000000000000000000000, i64 -5, + float -0.0, double -0.0, i8 -6, i16 -7, i32 -8, i64 -9, + float 0.0, double 0.0, + fp128 0xL00000000000000000000000000000000) + ret void +} diff --git a/test/CodeGen/SystemZ/args-03.ll b/test/CodeGen/SystemZ/args-03.ll new file mode 100644 index 0000000..f954d58 --- /dev/null +++ b/test/CodeGen/SystemZ/args-03.ll @@ -0,0 +1,78 @@ +; Test the handling of GPR, FPR and stack arguments when integers are +; zero-extended. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-INT +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FLOAT +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-DOUBLE +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-2 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-STACK + +declare void @bar(i8 zeroext, i16 zeroext, i32 zeroext, i64, float, double, + fp128, i64, float, double, i8 zeroext, i16 zeroext, + i32 zeroext, i64, float, double, fp128) + +; There are two indirect fp128 slots, one at offset 224 (the first available +; byte after the outgoing arguments) and one immediately after it at 240. +; These slots should be set up outside the glued call sequence, so would +; normally use %f0/%f2 as the first available 128-bit pair. This choice +; is hard-coded in the FP128 tests. +; +; The order of the CHECK-INT loads doesn't matter. The same goes for the +; CHECK_FP128-* stores and the CHECK-STACK stores. It would be OK to reorder +; them in response to future code changes. +define void @foo() { +; CHECK-INT: foo: +; CHECK-INT: lghi %r2, 255 +; CHECK-INT: llill %r3, 65534 +; CHECK-INT: llilf %r4, 4294967293 +; CHECK-INT: lghi %r5, -4 +; CHECK-INT: la %r6, {{224|240}}(%r15) +; CHECK-INT: brasl %r14, bar@PLT +; +; CHECK-FLOAT: foo: +; CHECK-FLOAT: lzer %f0 +; CHECK-FLOAT: lcebr %f4, %f0 +; CHECK-FLOAT: brasl %r14, bar@PLT +; +; CHECK-DOUBLE: foo: +; CHECK-DOUBLE: lzdr %f2 +; CHECK-DOUBLE: lcdbr %f6, %f2 +; CHECK-DOUBLE: brasl %r14, bar@PLT +; +; CHECK-FP128-1: foo: +; CHECK-FP128-1: aghi %r15, -256 +; CHECK-FP128-1: lzxr %f0 +; CHECK-FP128-1: std %f0, 224(%r15) +; CHECK-FP128-1: std %f2, 232(%r15) +; CHECK-FP128-1: brasl %r14, bar@PLT +; +; CHECK-FP128-2: foo: +; CHECK-FP128-2: aghi %r15, -256 +; CHECK-FP128-2: lzxr %f0 +; CHECK-FP128-2: std %f0, 240(%r15) +; CHECK-FP128-2: std %f2, 248(%r15) +; CHECK-FP128-2: brasl %r14, bar@PLT +; +; CHECK-STACK: foo: +; CHECK-STACK: aghi %r15, -256 +; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15) +; CHECK-STACK: stg [[REGISTER]], 216(%r15) +; CHECK-STACK: llilf [[AT184:%r[0-5]+]], 4294967288 +; CHECK-STACK: stg [[AT184]], 184(%r15) +; CHECK-STACK: llill [[AT176:%r[0-5]+]], 65529 +; CHECK-STACK: stg [[AT176]], 176(%r15) +; CHECK-STACK: mvghi 208(%r15), 0 +; CHECK-STACK: mvhi 204(%r15), 0 +; CHECK-STACK: mvghi 192(%r15), -9 +; CHECK-STACK: mvghi 168(%r15), 250 +; CHECK-STACK: mvghi 160(%r15), -5 +; CHECK-STACK: brasl %r14, bar@PLT + + call void @bar (i8 -1, i16 -2, i32 -3, i64 -4, float 0.0, double 0.0, + fp128 0xL00000000000000000000000000000000, i64 -5, + float -0.0, double -0.0, i8 -6, i16 -7, i32 -8, i64 -9, + float 0.0, double 0.0, + fp128 0xL00000000000000000000000000000000) + ret void +} diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll new file mode 100644 index 0000000..8340494 --- /dev/null +++ b/test/CodeGen/SystemZ/args-04.ll @@ -0,0 +1,126 @@ +; Test incoming GPR, FPR and stack arguments when no extension type is given. +; This type of argument is used for passing structures, etc. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Do some arithmetic so that we can see the register being used. +define i8 @f1(i8 %r2) { +; CHECK: f1: +; CHECK: ahi %r2, 1 +; CHECK: br %r14 + %y = add i8 %r2, 1 + ret i8 %y +} + +define i16 @f2(i8 %r2, i16 %r3) { +; CHECK: f2: +; CHECK: {{lr|lgr}} %r2, %r3 +; CHECK: br %r14 + ret i16 %r3 +} + +define i32 @f3(i8 %r2, i16 %r3, i32 %r4) { +; CHECK: f3: +; CHECK: {{lr|lgr}} %r2, %r4 +; CHECK: br %r14 + ret i32 %r4 +} + +define i64 @f4(i8 %r2, i16 %r3, i32 %r4, i64 %r5) { +; CHECK: f4: +; CHECK: {{lr|lgr}} %r2, %r5 +; CHECK: br %r14 + ret i64 %r5 +} + +; Do some arithmetic so that we can see the register being used. +define float @f5(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0) { +; CHECK: f5: +; CHECK: aebr %f0, %f0 +; CHECK: br %r14 + %y = fadd float %f0, %f0 + ret float %y +} + +define double @f6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2) { +; CHECK: f6: +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + ret double %f2 +} + +; fp128s are passed indirectly. Do some arithmetic so that the value +; must be interpreted as a float, rather than as a block of memory to +; be copied. +define void @f7(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, + fp128 %r6) { +; CHECK: f7: +; CHECK: ld %f0, 0(%r6) +; CHECK: ld %f2, 8(%r6) +; CHECK: axbr %f0, %f0 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %y = fadd fp128 %r6, %r6 + store fp128 %y, fp128 *%r2 + ret void +} + +define i64 @f8(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, + fp128 %r6, i64 %s1) { +; CHECK: f8: +; CHECK: lg %r2, 160(%r15) +; CHECK: br %r14 + ret i64 %s1 +} + +define float @f9(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, + fp128 %r6, i64 %s1, float %f4) { +; CHECK: f9: +; CHECK: ler %f0, %f4 +; CHECK: br %r14 + ret float %f4 +} + +define double @f10(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, + fp128 %r6, i64 %s1, float %f4, double %f6) { +; CHECK: f10: +; CHECK: ldr %f0, %f6 +; CHECK: br %r14 + ret double %f6 +} + +define i64 @f11(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, + fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2) { +; CHECK: f11: +; CHECK: lg %r2, 168(%r15) +; CHECK: br %r14 + ret i64 %s2 +} + +; Floats are passed right-justified. +define float @f12(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, + fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2, + float %s3) { +; CHECK: f12: +; CHECK: le %f0, 180(%r15) +; CHECK: br %r14 + ret float %s3 +} + +; Test a case where the fp128 address is passed on the stack. +define void @f13(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2, + fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2, + float %s3, fp128 %s4) { +; CHECK: f13: +; CHECK: lg [[REGISTER:%r[1-5]+]], 184(%r15) +; CHECK: ld %f0, 0([[REGISTER]]) +; CHECK: ld %f2, 8([[REGISTER]]) +; CHECK: axbr %f0, %f0 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %y = fadd fp128 %s4, %s4 + store fp128 %y, fp128 *%r2 + ret void +} diff --git a/test/CodeGen/SystemZ/args-05.ll b/test/CodeGen/SystemZ/args-05.ll new file mode 100644 index 0000000..9fa193a --- /dev/null +++ b/test/CodeGen/SystemZ/args-05.ll @@ -0,0 +1,47 @@ +; Test that we take advantage of signext and zeroext annotations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Zero extension of something that is already zero-extended. +define void @f1(i32 zeroext %r2, i64 *%r3) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: stg %r2, 0(%r3) +; CHECK: br %r14 + %conv = zext i32 %r2 to i64 + store i64 %conv, i64* %r3 + ret void +} + +; Sign extension of something that is already sign-extended. +define void @f2(i32 signext %r2, i64 *%r3) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: stg %r2, 0(%r3) +; CHECK: br %r14 + %conv = sext i32 %r2 to i64 + store i64 %conv, i64* %r3 + ret void +} + +; Sign extension of something that is already zero-extended. +define void @f3(i32 zeroext %r2, i64 *%r3) { +; CHECK: f3: +; CHECK: lgfr [[REGISTER:%r[0-5]+]], %r2 +; CHECK: stg [[REGISTER]], 0(%r3) +; CHECK: br %r14 + %conv = sext i32 %r2 to i64 + store i64 %conv, i64* %r3 + ret void +} + +; Zero extension of something that is already sign-extended. +define void @f4(i32 signext %r2, i64 *%r3) { +; CHECK: f4: +; CHECK: llgfr [[REGISTER:%r[0-5]+]], %r2 +; CHECK: stg [[REGISTER]], 0(%r3) +; CHECK: br %r14 + %conv = zext i32 %r2 to i64 + store i64 %conv, i64* %r3 + ret void +} diff --git a/test/CodeGen/SystemZ/args-06.ll b/test/CodeGen/SystemZ/args-06.ll new file mode 100644 index 0000000..b2f8bee --- /dev/null +++ b/test/CodeGen/SystemZ/args-06.ll @@ -0,0 +1,76 @@ +; Test the padding of unextended integer stack parameters. These are used +; to pass structures. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define i8 @f1(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g) { +; CHECK: f1: +; CHECK: ar %r2, %r3 +; CHECK: ar %r2, %r4 +; CHECK: ar %r2, %r5 +; CHECK: ar %r2, %r6 +; CHECK: lb {{%r[0-5]}}, 167(%r15) +; CHECK: lb {{%r[0-5]}}, 175(%r15) +; CHECK: br %r14 + %addb = add i8 %a, %b + %addc = add i8 %addb, %c + %addd = add i8 %addc, %d + %adde = add i8 %addd, %e + %addf = add i8 %adde, %f + %addg = add i8 %addf, %g + ret i8 %addg +} + +define i16 @f2(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g) { +; CHECK: f2: +; CHECK: ar %r2, %r3 +; CHECK: ar %r2, %r4 +; CHECK: ar %r2, %r5 +; CHECK: ar %r2, %r6 +; CHECK: lh {{%r[0-5]}}, 166(%r15) +; CHECK: lh {{%r[0-5]}}, 174(%r15) +; CHECK: br %r14 + %addb = add i16 %a, %b + %addc = add i16 %addb, %c + %addd = add i16 %addc, %d + %adde = add i16 %addd, %e + %addf = add i16 %adde, %f + %addg = add i16 %addf, %g + ret i16 %addg +} + +define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) { +; CHECK: f3: +; CHECK: ar %r2, %r3 +; CHECK: ar %r2, %r4 +; CHECK: ar %r2, %r5 +; CHECK: ar %r2, %r6 +; CHECK: a %r2, 164(%r15) +; CHECK: a %r2, 172(%r15) +; CHECK: br %r14 + %addb = add i32 %a, %b + %addc = add i32 %addb, %c + %addd = add i32 %addc, %d + %adde = add i32 %addd, %e + %addf = add i32 %adde, %f + %addg = add i32 %addf, %g + ret i32 %addg +} + +define i64 @f4(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) { +; CHECK: f4: +; CHECK: agr %r2, %r3 +; CHECK: agr %r2, %r4 +; CHECK: agr %r2, %r5 +; CHECK: agr %r2, %r6 +; CHECK: ag %r2, 160(%r15) +; CHECK: ag %r2, 168(%r15) +; CHECK: br %r14 + %addb = add i64 %a, %b + %addc = add i64 %addb, %c + %addd = add i64 %addc, %d + %adde = add i64 %addd, %e + %addf = add i64 %adde, %f + %addg = add i64 %addf, %g + ret i64 %addg +} diff --git a/test/CodeGen/SystemZ/asm-01.ll b/test/CodeGen/SystemZ/asm-01.ll new file mode 100644 index 0000000..016d04c --- /dev/null +++ b/test/CodeGen/SystemZ/asm-01.ll @@ -0,0 +1,61 @@ +; Test the "Q" asm constraint, which accepts addresses that have a base +; and a 12-bit displacement. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest range. +define void @f1(i64 %base) { +; CHECK: f1: +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %addr = inttoptr i64 %base to i64 * + call void asm "blah $0", "=*Q" (i64 *%addr) + ret void +} + +; Check the next lowest byte. +define void @f2(i64 %base) { +; CHECK: f2: +; CHECK: aghi %r2, -1 +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %add = add i64 %base, -1 + %addr = inttoptr i64 %add to i64 * + call void asm "blah $0", "=*Q" (i64 *%addr) + ret void +} + +; Check the highest range. +define void @f3(i64 %base) { +; CHECK: f3: +; CHECK: blah 4095(%r2) +; CHECK: br %r14 + %add = add i64 %base, 4095 + %addr = inttoptr i64 %add to i64 * + call void asm "blah $0", "=*Q" (i64 *%addr) + ret void +} + +; Check the next highest byte. +define void @f4(i64 %base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %add = add i64 %base, 4096 + %addr = inttoptr i64 %add to i64 * + call void asm "blah $0", "=*Q" (i64 *%addr) + ret void +} + +; Check that indices aren't allowed +define void @f5(i64 %base, i64 %index) { +; CHECK: f5: +; CHECK: agr %r2, %r3 +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %add = add i64 %base, %index + %addr = inttoptr i64 %add to i64 * + call void asm "blah $0", "=*Q" (i64 *%addr) + ret void +} diff --git a/test/CodeGen/SystemZ/asm-02.ll b/test/CodeGen/SystemZ/asm-02.ll new file mode 100644 index 0000000..12d8bec --- /dev/null +++ b/test/CodeGen/SystemZ/asm-02.ll @@ -0,0 +1,52 @@ +; Test the "R" asm constraint, which accepts addresses that have a base, +; an index and a 12-bit displacement. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest range. +define void @f1(i64 %base) { +; CHECK: f1: +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %addr = inttoptr i64 %base to i64 * + call void asm "blah $0", "=*R" (i64 *%addr) + ret void +} + +; Check the next lowest byte. +define void @f2(i64 %base) { +; CHECK: f2: +; CHECK: aghi %r2, -1 +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %add = add i64 %base, -1 + %addr = inttoptr i64 %add to i64 * + call void asm "blah $0", "=*R" (i64 *%addr) + ret void +} + +; Check the highest range. +define void @f3(i64 %base) { +; CHECK: f3: +; CHECK: blah 4095(%r2) +; CHECK: br %r14 + %add = add i64 %base, 4095 + %addr = inttoptr i64 %add to i64 * + call void asm "blah $0", "=*R" (i64 *%addr) + ret void +} + +; Check the next highest byte. +define void @f4(i64 %base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %add = add i64 %base, 4096 + %addr = inttoptr i64 %add to i64 * + call void asm "blah $0", "=*R" (i64 *%addr) + ret void +} + +; FIXME: at the moment the precise constraint is not passed down to +; target code, so we must conservatively treat "R" as "Q". diff --git a/test/CodeGen/SystemZ/asm-03.ll b/test/CodeGen/SystemZ/asm-03.ll new file mode 100644 index 0000000..a6f3f2a --- /dev/null +++ b/test/CodeGen/SystemZ/asm-03.ll @@ -0,0 +1,16 @@ +; Test the "S" asm constraint, which accepts addresses that have a base +; and a 20-bit displacement. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(i64 %base) { +; CHECK: f1: +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %addr = inttoptr i64 %base to i64 * + call void asm "blah $0", "=*S" (i64 *%addr) + ret void +} + +; FIXME: at the moment the precise constraint is not passed down to +; target code, so we must conservatively treat "S" as "Q". diff --git a/test/CodeGen/SystemZ/asm-04.ll b/test/CodeGen/SystemZ/asm-04.ll new file mode 100644 index 0000000..0560949 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-04.ll @@ -0,0 +1,16 @@ +; Test the "T" asm constraint, which accepts addresses that have a base, +; an index and a 20-bit displacement. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(i64 %base) { +; CHECK: f1: +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %addr = inttoptr i64 %base to i64 * + call void asm "blah $0", "=*T" (i64 *%addr) + ret void +} + +; FIXME: at the moment the precise constraint is not passed down to +; target code, so we must conservatively treat "T" as "Q". diff --git a/test/CodeGen/SystemZ/asm-05.ll b/test/CodeGen/SystemZ/asm-05.ll new file mode 100644 index 0000000..dae90b0 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-05.ll @@ -0,0 +1,15 @@ +; Test the "m" asm constraint, which is equivalent to "T". +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(i64 %base) { +; CHECK: f1: +; CHECK: blah 0(%r2) +; CHECK: br %r14 + %addr = inttoptr i64 %base to i64 * + call void asm "blah $0", "=*m" (i64 *%addr) + ret void +} + +; FIXME: at the moment the precise constraint is not passed down to +; target code, so we must conservatively treat "m" as "Q". diff --git a/test/CodeGen/SystemZ/asm-06.ll b/test/CodeGen/SystemZ/asm-06.ll new file mode 100644 index 0000000..c0e24a3 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-06.ll @@ -0,0 +1,39 @@ +; Test the GPR constraint "a", which forbids %r0. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define i64 @f1() { +; CHECK: f1: +; CHECK: lhi %r1, 1 +; CHECK: blah %r2 %r1 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=r,a" (i8 1) + ret i64 %val +} + +define i64 @f2() { +; CHECK: f2: +; CHECK: lhi %r1, 2 +; CHECK: blah %r2 %r1 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=r,a" (i16 2) + ret i64 %val +} + +define i64 @f3() { +; CHECK: f3: +; CHECK: lhi %r1, 3 +; CHECK: blah %r2 %r1 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=r,a" (i32 3) + ret i64 %val +} + +define i64 @f4() { +; CHECK: f4: +; CHECK: lghi %r1, 4 +; CHECK: blah %r2 %r1 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=r,a" (i64 4) + ret i64 %val +} diff --git a/test/CodeGen/SystemZ/asm-07.ll b/test/CodeGen/SystemZ/asm-07.ll new file mode 100644 index 0000000..e07286d --- /dev/null +++ b/test/CodeGen/SystemZ/asm-07.ll @@ -0,0 +1,39 @@ +; Test the GPR constraint "r". +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define i64 @f1() { +; CHECK: f1: +; CHECK: lhi %r0, 1 +; CHECK: blah %r2 %r0 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=r,r" (i8 1) + ret i64 %val +} + +define i64 @f2() { +; CHECK: f2: +; CHECK: lhi %r0, 2 +; CHECK: blah %r2 %r0 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=r,r" (i16 2) + ret i64 %val +} + +define i64 @f3() { +; CHECK: f3: +; CHECK: lhi %r0, 3 +; CHECK: blah %r2 %r0 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=r,r" (i32 3) + ret i64 %val +} + +define i64 @f4() { +; CHECK: f4: +; CHECK: lghi %r0, 4 +; CHECK: blah %r2 %r0 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=r,r" (i64 4) + ret i64 %val +} diff --git a/test/CodeGen/SystemZ/asm-08.ll b/test/CodeGen/SystemZ/asm-08.ll new file mode 100644 index 0000000..15abc4d --- /dev/null +++ b/test/CodeGen/SystemZ/asm-08.ll @@ -0,0 +1,39 @@ +; Test the GPR constraint "d", which is equivalent to "r". +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define i64 @f1() { +; CHECK: f1: +; CHECK: lhi %r0, 1 +; CHECK: blah %r2 %r0 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=d,d" (i8 1) + ret i64 %val +} + +define i64 @f2() { +; CHECK: f2: +; CHECK: lhi %r0, 2 +; CHECK: blah %r2 %r0 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=d,d" (i16 2) + ret i64 %val +} + +define i64 @f3() { +; CHECK: f3: +; CHECK: lhi %r0, 3 +; CHECK: blah %r2 %r0 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=d,d" (i32 3) + ret i64 %val +} + +define i64 @f4() { +; CHECK: f4: +; CHECK: lghi %r0, 4 +; CHECK: blah %r2 %r0 +; CHECK: br %r14 + %val = call i64 asm "blah $0 $1", "=d,d" (i64 4) + ret i64 %val +} diff --git a/test/CodeGen/SystemZ/asm-09.ll b/test/CodeGen/SystemZ/asm-09.ll new file mode 100644 index 0000000..1541170 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-09.ll @@ -0,0 +1,83 @@ +; Test matching operands with the GPR constraint "r". +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(i32 *%dst) { +; CHECK: f1: +; CHECK: lhi %r0, 100 +; CHECK: blah %r0 +; CHECK: st %r0, 0(%r2) +; CHECK: br %r14 + %val = call i32 asm "blah $0", "=r,0" (i8 100) + store i32 %val, i32 *%dst + ret void +} + +define void @f2(i32 *%dst) { +; CHECK: f2: +; CHECK: lhi %r0, 101 +; CHECK: blah %r0 +; CHECK: st %r0, 0(%r2) +; CHECK: br %r14 + %val = call i32 asm "blah $0", "=r,0" (i16 101) + store i32 %val, i32 *%dst + ret void +} + +define void @f3(i32 *%dst) { +; CHECK: f3: +; CHECK: lhi %r0, 102 +; CHECK: blah %r0 +; CHECK: st %r0, 0(%r2) +; CHECK: br %r14 + %val = call i32 asm "blah $0", "=r,0" (i32 102) + store i32 %val, i32 *%dst + ret void +} + +; FIXME: this uses "lhi %r0, 103", but should use "lghi %r0, 103". +define void @f4(i32 *%dst) { +; CHECK: f4: +; CHECK: blah %r0 +; CHECK: st %r0, 0(%r2) +; CHECK: br %r14 + %val = call i32 asm "blah $0", "=r,0" (i64 103) + store i32 %val, i32 *%dst + ret void +} + +define i64 @f5() { +; CHECK: f5: +; CHECK: lghi %r2, 104 +; CHECK: blah %r2 +; CHECK: br %r14 + %val = call i64 asm "blah $0", "=r,0" (i8 104) + ret i64 %val +} + +define i64 @f6() { +; CHECK: f6: +; CHECK: lghi %r2, 105 +; CHECK: blah %r2 +; CHECK: br %r14 + %val = call i64 asm "blah $0", "=r,0" (i16 105) + ret i64 %val +} + +define i64 @f7() { +; CHECK: f7: +; CHECK: lghi %r2, 106 +; CHECK: blah %r2 +; CHECK: br %r14 + %val = call i64 asm "blah $0", "=r,0" (i32 106) + ret i64 %val +} + +define i64 @f8() { +; CHECK: f8: +; CHECK: lghi %r2, 107 +; CHECK: blah %r2 +; CHECK: br %r14 + %val = call i64 asm "blah $0", "=r,0" (i64 107) + ret i64 %val +} diff --git a/test/CodeGen/SystemZ/asm-10.ll b/test/CodeGen/SystemZ/asm-10.ll new file mode 100644 index 0000000..676c202 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-10.ll @@ -0,0 +1,30 @@ +; Test the FPR constraint "f". +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define float @f1() { +; CHECK: f1: +; CHECK: lzer %f1 +; CHECK: blah %f0 %f1 +; CHECK: br %r14 + %val = call float asm "blah $0 $1", "=&f,f" (float 0.0) + ret float %val +} + +define double @f2() { +; CHECK: f2: +; CHECK: lzdr %f1 +; CHECK: blah %f0 %f1 +; CHECK: br %r14 + %val = call double asm "blah $0 $1", "=&f,f" (double 0.0) + ret double %val +} + +define double @f3() { +; CHECK: f3: +; CHECK: lzxr %f1 +; CHECK: blah %f0 %f1 +; CHECK: br %r14 + %val = call double asm "blah $0 $1", "=&f,f" (fp128 0xL00000000000000000000000000000000) + ret double %val +} diff --git a/test/CodeGen/SystemZ/asm-11.ll b/test/CodeGen/SystemZ/asm-11.ll new file mode 100644 index 0000000..9bd8d7c --- /dev/null +++ b/test/CodeGen/SystemZ/asm-11.ll @@ -0,0 +1,41 @@ +; Test the "I" constraint (8-bit unsigned constants). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test 1 below the first valid value. +define i32 @f1() { +; CHECK: f1: +; CHECK: lhi [[REG:%r[0-5]]], -1 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 -1) + ret i32 %val +} + +; Test the first valid value. +define i32 @f2() { +; CHECK: f2: +; CHECK: blah %r2 0 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 0) + ret i32 %val +} + +; Test the last valid value. +define i32 @f3() { +; CHECK: f3: +; CHECK: blah %r2 255 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 255) + ret i32 %val +} + +; Test 1 above the last valid value. +define i32 @f4() { +; CHECK: f4: +; CHECK: lhi [[REG:%r[0-5]]], 256 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 256) + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/asm-12.ll b/test/CodeGen/SystemZ/asm-12.ll new file mode 100644 index 0000000..dd920f1 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-12.ll @@ -0,0 +1,41 @@ +; Test the "J" constraint (12-bit unsigned constants). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test 1 below the first valid value. +define i32 @f1() { +; CHECK: f1: +; CHECK: lhi [[REG:%r[0-5]]], -1 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 -1) + ret i32 %val +} + +; Test the first valid value. +define i32 @f2() { +; CHECK: f2: +; CHECK: blah %r2 0 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 0) + ret i32 %val +} + +; Test the last valid value. +define i32 @f3() { +; CHECK: f3: +; CHECK: blah %r2 4095 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 4095) + ret i32 %val +} + +; Test 1 above the last valid value. +define i32 @f4() { +; CHECK: f4: +; CHECK: lhi [[REG:%r[0-5]]], 4096 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 4096) + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/asm-13.ll b/test/CodeGen/SystemZ/asm-13.ll new file mode 100644 index 0000000..af3fdb3 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-13.ll @@ -0,0 +1,41 @@ +; Test the "K" constraint (16-bit signed constants). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test 1 below the first valid value. +define i32 @f1() { +; CHECK: f1: +; CHECK: iilf [[REG:%r[0-5]]], 4294934527 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 -32769) + ret i32 %val +} + +; Test the first valid value. +define i32 @f2() { +; CHECK: f2: +; CHECK: blah %r2 -32768 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 -32768) + ret i32 %val +} + +; Test the last valid value. +define i32 @f3() { +; CHECK: f3: +; CHECK: blah %r2 32767 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 32767) + ret i32 %val +} + +; Test 1 above the last valid value. +define i32 @f4() { +; CHECK: f4: +; CHECK: llill [[REG:%r[0-5]]], 32768 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 32768) + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/asm-14.ll b/test/CodeGen/SystemZ/asm-14.ll new file mode 100644 index 0000000..b6b28d6 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-14.ll @@ -0,0 +1,41 @@ +; Test the "L" constraint (20-bit signed constants). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test 1 below the first valid value. +define i32 @f1() { +; CHECK: f1: +; CHECK: iilf [[REG:%r[0-5]]], 4294443007 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 -524289) + ret i32 %val +} + +; Test the first valid value. +define i32 @f2() { +; CHECK: f2: +; CHECK: blah %r2 -524288 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 -524288) + ret i32 %val +} + +; Test the last valid value. +define i32 @f3() { +; CHECK: f3: +; CHECK: blah %r2 524287 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 524287) + ret i32 %val +} + +; Test 1 above the last valid value. +define i32 @f4() { +; CHECK: f4: +; CHECK: llilh [[REG:%r[0-5]]], 8 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 524288) + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/asm-15.ll b/test/CodeGen/SystemZ/asm-15.ll new file mode 100644 index 0000000..4d0e2b4 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-15.ll @@ -0,0 +1,32 @@ +; Test the "M" constraint (0x7fffffff) +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test 1 below the valid value. +define i32 @f1() { +; CHECK: f1: +; CHECK: iilf [[REG:%r[0-5]]], 2147483646 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483646) + ret i32 %val +} + +; Test the first valid value. +define i32 @f2() { +; CHECK: f2: +; CHECK: blah %r2 2147483647 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483647) + ret i32 %val +} + +; Test 1 above the valid value. +define i32 @f3() { +; CHECK: f3: +; CHECK: llilh [[REG:%r[0-5]]], 32768 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483648) + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/asm-16.ll b/test/CodeGen/SystemZ/asm-16.ll new file mode 100644 index 0000000..4d0e2b4 --- /dev/null +++ b/test/CodeGen/SystemZ/asm-16.ll @@ -0,0 +1,32 @@ +; Test the "M" constraint (0x7fffffff) +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test 1 below the valid value. +define i32 @f1() { +; CHECK: f1: +; CHECK: iilf [[REG:%r[0-5]]], 2147483646 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483646) + ret i32 %val +} + +; Test the first valid value. +define i32 @f2() { +; CHECK: f2: +; CHECK: blah %r2 2147483647 +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483647) + ret i32 %val +} + +; Test 1 above the valid value. +define i32 @f3() { +; CHECK: f3: +; CHECK: llilh [[REG:%r[0-5]]], 32768 +; CHECK: blah %r2 [[REG]] +; CHECK: br %r14 + %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483648) + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/atomic-load-01.ll b/test/CodeGen/SystemZ/atomic-load-01.ll new file mode 100644 index 0000000..3e86bcf --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-load-01.ll @@ -0,0 +1,13 @@ +; Test 8-bit atomic loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is just a placeholder to make sure that loads are handled. +; The CS-based sequence is probably far too conservative. +define i8 @f1(i8 *%src) { +; CHECK: f1: +; CHECK: cs +; CHECK: br %r14 + %val = load atomic i8 *%src seq_cst, align 1 + ret i8 %val +} diff --git a/test/CodeGen/SystemZ/atomic-load-02.ll b/test/CodeGen/SystemZ/atomic-load-02.ll new file mode 100644 index 0000000..d6168ce --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-load-02.ll @@ -0,0 +1,13 @@ +; Test 16-bit atomic loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is just a placeholder to make sure that loads are handled. +; The CS-based sequence is probably far too conservative. +define i16 @f1(i16 *%src) { +; CHECK: f1: +; CHECK: cs +; CHECK: br %r14 + %val = load atomic i16 *%src seq_cst, align 2 + ret i16 %val +} diff --git a/test/CodeGen/SystemZ/atomic-load-03.ll b/test/CodeGen/SystemZ/atomic-load-03.ll new file mode 100644 index 0000000..fcf0cf3 --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-load-03.ll @@ -0,0 +1,14 @@ +; Test 32-bit atomic loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is just a placeholder to make sure that loads are handled. +; Using CS is probably too conservative. +define i32 @f1(i32 %dummy, i32 *%src) { +; CHECK: f1: +; CHECK: lhi %r2, 0 +; CHECK: cs %r2, %r2, 0(%r3) +; CHECK: br %r14 + %val = load atomic i32 *%src seq_cst, align 4 + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/atomic-load-04.ll b/test/CodeGen/SystemZ/atomic-load-04.ll new file mode 100644 index 0000000..9593d35 --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-load-04.ll @@ -0,0 +1,14 @@ +; Test 64-bit atomic loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is just a placeholder to make sure that loads are handled. +; Using CSG is probably too conservative. +define i64 @f1(i64 %dummy, i64 *%src) { +; CHECK: f1: +; CHECK: lghi %r2, 0 +; CHECK: csg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %val = load atomic i64 *%src seq_cst, align 8 + ret i64 %val +} diff --git a/test/CodeGen/SystemZ/atomic-store-01.ll b/test/CodeGen/SystemZ/atomic-store-01.ll new file mode 100644 index 0000000..b316e5c --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-store-01.ll @@ -0,0 +1,13 @@ +; Test 8-bit atomic stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is just a placeholder to make sure that stores are handled. +; The CS-based sequence is probably far too conservative. +define void @f1(i8 %val, i8 *%src) { +; CHECK: f1: +; CHECK: cs +; CHECK: br %r14 + store atomic i8 %val, i8 *%src seq_cst, align 1 + ret void +} diff --git a/test/CodeGen/SystemZ/atomic-store-02.ll b/test/CodeGen/SystemZ/atomic-store-02.ll new file mode 100644 index 0000000..c761714 --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-store-02.ll @@ -0,0 +1,13 @@ +; Test 16-bit atomic stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is just a placeholder to make sure that stores are handled. +; The CS-based sequence is probably far too conservative. +define void @f1(i16 %val, i16 *%src) { +; CHECK: f1: +; CHECK: cs +; CHECK: br %r14 + store atomic i16 %val, i16 *%src seq_cst, align 2 + ret void +} diff --git a/test/CodeGen/SystemZ/atomic-store-03.ll b/test/CodeGen/SystemZ/atomic-store-03.ll new file mode 100644 index 0000000..6e29963 --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-store-03.ll @@ -0,0 +1,16 @@ +; Test 32-bit atomic stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is just a placeholder to make sure that stores are handled. +; Using CS is probably too conservative. +define void @f1(i32 %val, i32 *%src) { +; CHECK: f1: +; CHECK: l %r0, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: cs %r0, %r2, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + store atomic i32 %val, i32 *%src seq_cst, align 4 + ret void +} diff --git a/test/CodeGen/SystemZ/atomic-store-04.ll b/test/CodeGen/SystemZ/atomic-store-04.ll new file mode 100644 index 0000000..7a611c8 --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-store-04.ll @@ -0,0 +1,16 @@ +; Test 64-bit atomic stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is just a placeholder to make sure that stores are handled. +; Using CS is probably too conservative. +define void @f1(i64 %val, i64 *%src) { +; CHECK: f1: +; CHECK: lg %r0, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: csg %r0, %r2, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + store atomic i64 %val, i64 *%src seq_cst, align 8 + ret void +} diff --git a/test/CodeGen/SystemZ/atomicrmw-add-01.ll b/test/CodeGen/SystemZ/atomicrmw-add-01.ll new file mode 100644 index 0000000..2a84857 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-add-01.ll @@ -0,0 +1,132 @@ +; Test 8-bit atomic additions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check addition of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used. This shift is independent of the other loop prologue +; instructions. +define i8 @f1(i8 *%src, i8 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: ar [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: ar {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check the minimum signed value. We add 0x80000000 to the rotated word. +define i8 @f2(i8 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: afi [[ROT]], -2147483648 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i8 *%src, i8 -128 seq_cst + ret i8 %res +} + +; Check addition of -1. We add 0xff000000 to the rotated word. +define i8 @f3(i8 *%src) { +; CHECK: f3: +; CHECK: afi [[ROT]], -16777216 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i8 *%src, i8 -1 seq_cst + ret i8 %res +} + +; Check addition of 1. We add 0x01000000 to the rotated word. +define i8 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: afi [[ROT]], 16777216 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i8 *%src, i8 1 seq_cst + ret i8 %res +} + +; Check the maximum signed value. We add 0x7f000000 to the rotated word. +define i8 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: afi [[ROT]], 2130706432 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i8 *%src, i8 127 seq_cst + ret i8 %res +} + +; Check addition of a large unsigned value. We add 0xfe000000 to the +; rotated word, expressed as a negative AFI operand. +define i8 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: afi [[ROT]], -33554432 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i8 *%src, i8 254 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-add-02.ll b/test/CodeGen/SystemZ/atomicrmw-add-02.ll new file mode 100644 index 0000000..3dd482d --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-add-02.ll @@ -0,0 +1,132 @@ +; Test 16-bit atomic additions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check addition of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used. This shift is independent of the other loop prologue +; instructions. +define i16 @f1(i16 *%src, i16 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: ar [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: ar {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check the minimum signed value. We add 0x80000000 to the rotated word. +define i16 @f2(i16 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: afi [[ROT]], -2147483648 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i16 *%src, i16 -32768 seq_cst + ret i16 %res +} + +; Check addition of -1. We add 0xffff0000 to the rotated word. +define i16 @f3(i16 *%src) { +; CHECK: f3: +; CHECK: afi [[ROT]], -65536 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i16 *%src, i16 -1 seq_cst + ret i16 %res +} + +; Check addition of 1. We add 0x00010000 to the rotated word. +define i16 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: afi [[ROT]], 65536 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i16 *%src, i16 1 seq_cst + ret i16 %res +} + +; Check the maximum signed value. We add 0x7fff0000 to the rotated word. +define i16 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: afi [[ROT]], 2147418112 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i16 *%src, i16 32767 seq_cst + ret i16 %res +} + +; Check addition of a large unsigned value. We add 0xfffe0000 to the +; rotated word, expressed as a negative AFI operand. +define i16 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: afi [[ROT]], -131072 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw add i16 *%src, i16 65534 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-add-03.ll b/test/CodeGen/SystemZ/atomicrmw-add-03.ll new file mode 100644 index 0000000..01eb8e0 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-add-03.ll @@ -0,0 +1,94 @@ +; Test 32-bit atomic additions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check addition of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f1: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lr %r0, %r2 +; CHECK: ar %r0, %r4 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check addition of 1, which can use AHI. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK: f2: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lr %r0, %r2 +; CHECK: ahi %r0, 1 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the AHI range. +define i32 @f3(i32 %dummy, i32 *%src) { +; CHECK: f3: +; CHECK: ahi %r0, 32767 +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 32767 seq_cst + ret i32 %res +} + +; Check the next value up, which must use AFI. +define i32 @f4(i32 %dummy, i32 *%src) { +; CHECK: f4: +; CHECK: afi %r0, 32768 +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 32768 seq_cst + ret i32 %res +} + +; Check the high end of the AFI range. +define i32 @f5(i32 %dummy, i32 *%src) { +; CHECK: f5: +; CHECK: afi %r0, 2147483647 +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 2147483647 seq_cst + ret i32 %res +} + +; Check the next value up, which gets treated as a negative operand. +define i32 @f6(i32 %dummy, i32 *%src) { +; CHECK: f6: +; CHECK: afi %r0, -2147483648 +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 2147483648 seq_cst + ret i32 %res +} + +; Check addition of -1, which can use AHI. +define i32 @f7(i32 %dummy, i32 *%src) { +; CHECK: f7: +; CHECK: ahi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 -1 seq_cst + ret i32 %res +} + +; Check the low end of the AHI range. +define i32 @f8(i32 %dummy, i32 *%src) { +; CHECK: f8: +; CHECK: ahi %r0, -32768 +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 -32768 seq_cst + ret i32 %res +} + +; Check the next value down, which must use AFI instead. +define i32 @f9(i32 %dummy, i32 *%src) { +; CHECK: f9: +; CHECK: afi %r0, -32769 +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 -32769 seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-add-04.ll b/test/CodeGen/SystemZ/atomicrmw-add-04.ll new file mode 100644 index 0000000..6b1d20b --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-add-04.ll @@ -0,0 +1,112 @@ +; Test 64-bit atomic additions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check addition of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: agr %r0, %r4 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check addition of 1, which can use AGHI. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK: f2: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: aghi %r0, 1 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the AGHI range. +define i64 @f3(i64 %dummy, i64 *%src) { +; CHECK: f3: +; CHECK: aghi %r0, 32767 +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 32767 seq_cst + ret i64 %res +} + +; Check the next value up, which must use AGFI. +define i64 @f4(i64 %dummy, i64 *%src) { +; CHECK: f4: +; CHECK: agfi %r0, 32768 +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 32768 seq_cst + ret i64 %res +} + +; Check the high end of the AGFI range. +define i64 @f5(i64 %dummy, i64 *%src) { +; CHECK: f5: +; CHECK: agfi %r0, 2147483647 +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 2147483647 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register addition. +define i64 @f6(i64 %dummy, i64 *%src) { +; CHECK: f6: +; CHECK: agr +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 2147483648 seq_cst + ret i64 %res +} + +; Check addition of -1, which can use AGHI. +define i64 @f7(i64 %dummy, i64 *%src) { +; CHECK: f7: +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 -1 seq_cst + ret i64 %res +} + +; Check the low end of the AGHI range. +define i64 @f8(i64 %dummy, i64 *%src) { +; CHECK: f8: +; CHECK: aghi %r0, -32768 +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 -32768 seq_cst + ret i64 %res +} + +; Check the next value down, which must use AGFI instead. +define i64 @f9(i64 %dummy, i64 *%src) { +; CHECK: f9: +; CHECK: agfi %r0, -32769 +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 -32769 seq_cst + ret i64 %res +} + +; Check the low end of the AGFI range. +define i64 @f10(i64 %dummy, i64 *%src) { +; CHECK: f10: +; CHECK: agfi %r0, -2147483648 +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 -2147483648 seq_cst + ret i64 %res +} + +; Check the next value down, which must use a register addition. +define i64 @f11(i64 %dummy, i64 *%src) { +; CHECK: f11: +; CHECK: agr +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 -2147483649 seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-and-01.ll b/test/CodeGen/SystemZ/atomicrmw-and-01.ll new file mode 100644 index 0000000..ebbce8e --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-and-01.ll @@ -0,0 +1,133 @@ +; Test 8-bit atomic ANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check AND of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used, and that the low bits are set to 1. This sequence is +; independent of the other loop prologue instructions. +define i8 @f1(i8 *%src, i8 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: nr [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: oilf %r3, 16777215 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: nr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check the minimum signed value. We AND the rotated word with 0x80ffffff. +define i8 @f2(i8 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: nilh [[ROT]], 33023 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i8 *%src, i8 -128 seq_cst + ret i8 %res +} + +; Check ANDs of -2 (-1 isn't useful). We AND the rotated word with 0xfeffffff. +define i8 @f3(i8 *%src) { +; CHECK: f3: +; CHECK: nilh [[ROT]], 65279 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i8 *%src, i8 -2 seq_cst + ret i8 %res +} + +; Check ANDs of 1. We AND the rotated word with 0x01ffffff. +define i8 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: nilh [[ROT]], 511 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i8 *%src, i8 1 seq_cst + ret i8 %res +} + +; Check the maximum signed value. We AND the rotated word with 0x7fffffff. +define i8 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: nilh [[ROT]], 32767 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i8 *%src, i8 127 seq_cst + ret i8 %res +} + +; Check ANDs of a large unsigned value. We AND the rotated word with +; 0xfdffffff. +define i8 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: nilh [[ROT]], 65023 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i8 *%src, i8 253 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-and-02.ll b/test/CodeGen/SystemZ/atomicrmw-and-02.ll new file mode 100644 index 0000000..b63ca4a --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-and-02.ll @@ -0,0 +1,133 @@ +; Test 16-bit atomic ANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check AND of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used, and that the low bits are set to 1. This sequence is +; independent of the other loop prologue instructions. +define i16 @f1(i16 *%src, i16 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: nr [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: oill %r3, 65535 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: nr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check the minimum signed value. We AND the rotated word with 0x8000ffff. +define i16 @f2(i16 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: nilh [[ROT]], 32768 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i16 *%src, i16 -32768 seq_cst + ret i16 %res +} + +; Check ANDs of -2 (-1 isn't useful). We AND the rotated word with 0xfffeffff. +define i16 @f3(i16 *%src) { +; CHECK: f3: +; CHECK: nilh [[ROT]], 65534 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i16 *%src, i16 -2 seq_cst + ret i16 %res +} + +; Check ANDs of 1. We AND the rotated word with 0x0001ffff. +define i16 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: nilh [[ROT]], 1 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i16 *%src, i16 1 seq_cst + ret i16 %res +} + +; Check the maximum signed value. We AND the rotated word with 0x7fffffff. +define i16 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: nilh [[ROT]], 32767 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i16 *%src, i16 32767 seq_cst + ret i16 %res +} + +; Check ANDs of a large unsigned value. We AND the rotated word with +; 0xfffdffff. +define i16 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: nilh [[ROT]], 65533 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw and i16 *%src, i16 65533 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-and-03.ll b/test/CodeGen/SystemZ/atomicrmw-and-03.ll new file mode 100644 index 0000000..ec69edc --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-and-03.ll @@ -0,0 +1,85 @@ +; Test 32-bit atomic ANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ANDs of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f1: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lr %r0, %r2 +; CHECK: nr %r0, %r4 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check ANDs of 1. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK: f2: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lr %r0, %r2 +; CHECK: nilf %r0, 1 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check ANDs of the low end of the NILH range. +define i32 @f3(i32 %dummy, i32 *%src) { +; CHECK: f3: +; CHECK: nilh %r0, 0 +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 65535 seq_cst + ret i32 %res +} + +; Check the next value up, which must use NILF. +define i32 @f4(i32 %dummy, i32 *%src) { +; CHECK: f4: +; CHECK: nilf %r0, 65536 +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 65536 seq_cst + ret i32 %res +} + +; Check the largest useful NILL value. +define i32 @f5(i32 %dummy, i32 *%src) { +; CHECK: f5: +; CHECK: nill %r0, 65534 +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 -2 seq_cst + ret i32 %res +} + +; Check the low end of the NILL range. +define i32 @f6(i32 %dummy, i32 *%src) { +; CHECK: f6: +; CHECK: nill %r0, 0 +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 -65536 seq_cst + ret i32 %res +} + +; Check the largest useful NILH value, which is one less than the above. +define i32 @f7(i32 %dummy, i32 *%src) { +; CHECK: f7: +; CHECK: nilh %r0, 65534 +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 -65537 seq_cst + ret i32 %res +} + +; Check the highest useful NILF value, which is one less than the above. +define i32 @f8(i32 %dummy, i32 *%src) { +; CHECK: f8: +; CHECK: nilf %r0, 4294901758 +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 -65538 seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-and-04.ll b/test/CodeGen/SystemZ/atomicrmw-and-04.ll new file mode 100644 index 0000000..71f29ba --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-and-04.ll @@ -0,0 +1,157 @@ +; Test 64-bit atomic ANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ANDs of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: ngr %r0, %r4 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check ANDs of 1, which must be done using a register. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK: f2: +; CHECK: ngr +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the low end of the NIHF range. +define i64 @f3(i64 %dummy, i64 *%src) { +; CHECK: f3: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: nihf %r0, 0 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 4294967295 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register. +define i64 @f4(i64 %dummy, i64 *%src) { +; CHECK: f4: +; CHECK: ngr +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 4294967296 seq_cst + ret i64 %res +} + +; Check the low end of the NIHH range. +define i64 @f5(i64 %dummy, i64 *%src) { +; CHECK: f5: +; CHECK: nihh %r0, 0 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 281474976710655 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register. +define i64 @f6(i64 %dummy, i64 *%src) { +; CHECK: f6: +; CHECK: ngr +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 281474976710656 seq_cst + ret i64 %res +} + +; Check the highest useful NILL value. +define i64 @f7(i64 %dummy, i64 *%src) { +; CHECK: f7: +; CHECK: nill %r0, 65534 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -2 seq_cst + ret i64 %res +} + +; Check the low end of the NILL range. +define i64 @f8(i64 %dummy, i64 *%src) { +; CHECK: f8: +; CHECK: nill %r0, 0 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -65536 seq_cst + ret i64 %res +} + +; Check the highest useful NILH value, which is one less than the above. +define i64 @f9(i64 %dummy, i64 *%src) { +; CHECK: f9: +; CHECK: nilh %r0, 65534 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -65537 seq_cst + ret i64 %res +} + +; Check the highest useful NILF value, which is one less than the above. +define i64 @f10(i64 %dummy, i64 *%src) { +; CHECK: f10: +; CHECK: nilf %r0, 4294901758 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -65538 seq_cst + ret i64 %res +} + +; Check the low end of the NILH range. +define i64 @f11(i64 %dummy, i64 *%src) { +; CHECK: f11: +; CHECK: nilh %r0, 0 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -4294901761 seq_cst + ret i64 %res +} + +; Check the low end of the NILF range. +define i64 @f12(i64 %dummy, i64 *%src) { +; CHECK: f12: +; CHECK: nilf %r0, 0 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -4294967296 seq_cst + ret i64 %res +} + +; Check the highest useful NIHL value, which is one less than the above. +define i64 @f13(i64 %dummy, i64 *%src) { +; CHECK: f13: +; CHECK: nihl %r0, 65534 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -4294967297 seq_cst + ret i64 %res +} + +; Check the low end of the NIHL range. +define i64 @f14(i64 %dummy, i64 *%src) { +; CHECK: f14: +; CHECK: nihl %r0, 0 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -281470681743361 seq_cst + ret i64 %res +} + +; Check the highest useful NIHH value, which is 1<<32 less than the above. +define i64 @f15(i64 %dummy, i64 *%src) { +; CHECK: f15: +; CHECK: nihh %r0, 65534 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -281474976710657 seq_cst + ret i64 %res +} + +; Check the highest useful NIHF value, which is 1<<32 less than the above. +define i64 @f16(i64 %dummy, i64 *%src) { +; CHECK: f16: +; CHECK: nihf %r0, 4294901758 +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -281479271677953 seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll new file mode 100644 index 0000000..c6ec77e --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll @@ -0,0 +1,228 @@ +; Test 8-bit atomic min/max operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check signed minimum. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used, and that the low bits are set to 1. This sequence is +; independent of the other loop prologue instructions. +define i8 @f1(i8 *%src, i8 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: cr [[ROT]], %r3 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: risbg [[ROT]], %r3, 32, 39, 0 +; CHECK: [[KEEP]]: +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw min i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check signed maximum. +define i8 @f2(i8 *%src, i8 %b) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: cr [[ROT]], %r3 +; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: risbg [[ROT]], %r3, 32, 39, 0 +; CHECK: [[KEEP]]: +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw max i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check unsigned minimum. +define i8 @f3(i8 *%src, i8 %b) { +; CHECK: f3: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: clr [[ROT]], %r3 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: risbg [[ROT]], %r3, 32, 39, 0 +; CHECK: [[KEEP]]: +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw umin i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check unsigned maximum. +define i8 @f4(i8 *%src, i8 %b) { +; CHECK: f4: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: clr [[ROT]], %r3 +; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: risbg [[ROT]], %r3, 32, 39, 0 +; CHECK: [[KEEP]]: +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw umax i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check the lowest useful signed minimum value. We need to load 0x81000000 +; into the source register. +define i8 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: llilh [[SRC2:%r[0-9]+]], 33024 +; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw min i8 *%src, i8 -127 seq_cst + ret i8 %res +} + +; Check the highest useful signed maximum value. We need to load 0x7e000000 +; into the source register. +define i8 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: llilh [[SRC2:%r[0-9]+]], 32256 +; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw max i8 *%src, i8 126 seq_cst + ret i8 %res +} + +; Check the lowest useful unsigned minimum value. We need to load 0x01000000 +; into the source register. +define i8 @f7(i8 *%src) { +; CHECK: f7: +; CHECK: llilh [[SRC2:%r[0-9]+]], 256 +; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f7: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f7: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw umin i8 *%src, i8 1 seq_cst + ret i8 %res +} + +; Check the highest useful unsigned maximum value. We need to load 0xfe000000 +; into the source register. +define i8 @f8(i8 *%src) { +; CHECK: f8: +; CHECK: llilh [[SRC2:%r[0-9]+]], 65024 +; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f8: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f8: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw umax i8 *%src, i8 254 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll new file mode 100644 index 0000000..9612e99 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll @@ -0,0 +1,228 @@ +; Test 8-bit atomic min/max operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check signed minimum. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used, and that the low bits are set to 1. This sequence is +; independent of the other loop prologue instructions. +define i16 @f1(i16 *%src, i16 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: cr [[ROT]], %r3 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: risbg [[ROT]], %r3, 32, 47, 0 +; CHECK: [[KEEP]]: +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw min i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check signed maximum. +define i16 @f2(i16 *%src, i16 %b) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: cr [[ROT]], %r3 +; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: risbg [[ROT]], %r3, 32, 47, 0 +; CHECK: [[KEEP]]: +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw max i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check unsigned minimum. +define i16 @f3(i16 *%src, i16 %b) { +; CHECK: f3: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: clr [[ROT]], %r3 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: risbg [[ROT]], %r3, 32, 47, 0 +; CHECK: [[KEEP]]: +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw umin i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check unsigned maximum. +define i16 @f4(i16 *%src, i16 %b) { +; CHECK: f4: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: clr [[ROT]], %r3 +; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: risbg [[ROT]], %r3, 32, 47, 0 +; CHECK: [[KEEP]]: +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw umax i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check the lowest useful signed minimum value. We need to load 0x80010000 +; into the source register. +define i16 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: llilh [[SRC2:%r[0-9]+]], 32769 +; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw min i16 *%src, i16 -32767 seq_cst + ret i16 %res +} + +; Check the highest useful signed maximum value. We need to load 0x7ffe0000 +; into the source register. +define i16 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: llilh [[SRC2:%r[0-9]+]], 32766 +; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw max i16 *%src, i16 32766 seq_cst + ret i16 %res +} + +; Check the lowest useful unsigned maximum value. We need to load 0x00010000 +; into the source register. +define i16 @f7(i16 *%src) { +; CHECK: f7: +; CHECK: llilh [[SRC2:%r[0-9]+]], 1 +; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f7: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f7: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw umin i16 *%src, i16 1 seq_cst + ret i16 %res +} + +; Check the highest useful unsigned maximum value. We need to load 0xfffe0000 +; into the source register. +define i16 @f8(i16 *%src) { +; CHECK: f8: +; CHECK: llilh [[SRC2:%r[0-9]+]], 65534 +; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]] +; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f8: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f8: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw umax i16 *%src, i16 65534 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll new file mode 100644 index 0000000..b5809bd --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll @@ -0,0 +1,176 @@ +; Test 32-bit atomic minimum and maximum. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check signed minium. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f1: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: cr %r2, %r4 +; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: lr [[NEW]], %r4 +; CHECK: cs %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw min i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check signed maximum. +define i32 @f2(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f2: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: cr %r2, %r4 +; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: lr [[NEW]], %r4 +; CHECK: cs %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw max i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check unsigned minimum. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f3: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: clr %r2, %r4 +; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: lr [[NEW]], %r4 +; CHECK: cs %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw umin i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check unsigned maximum. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f4: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: clr %r2, %r4 +; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: lr [[NEW]], %r4 +; CHECK: cs %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw umax i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check the high end of the aligned CS range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f5: +; CHECK: l %r2, 4092(%r3) +; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %res = atomicrmw min i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which requires CSY. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f6: +; CHECK: ly %r2, 4096(%r3) +; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %res = atomicrmw min i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the high end of the aligned CSY range. +define i32 @f7(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f7: +; CHECK: ly %r2, 524284(%r3) +; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %res = atomicrmw min i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f8(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f8: +; CHECK: agfi %r3, 524288 +; CHECK: l %r2, 0(%r3) +; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %res = atomicrmw min i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the high end of the negative aligned CSY range. +define i32 @f9(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f9: +; CHECK: ly %r2, -4(%r3) +; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %res = atomicrmw min i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the CSY range. +define i32 @f10(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f10: +; CHECK: ly %r2, -524288(%r3) +; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %res = atomicrmw min i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f11(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f11: +; CHECK: agfi %r3, -524292 +; CHECK: l %r2, 0(%r3) +; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %res = atomicrmw min i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check that indexed addresses are not allowed. +define i32 @f12(i32 %dummy, i64 %base, i64 %index, i32 %b) { +; CHECK: f12: +; CHECK: agr %r3, %r4 +; CHECK: l %r2, 0(%r3) +; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %res = atomicrmw min i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check that constants are forced into a register. +define i32 @f13(i32 %dummy, i32 *%ptr) { +; CHECK: f13: +; CHECK: lhi [[LIMIT:%r[0-9]+]], 42 +; CHECK: l %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: cr %r2, [[LIMIT]] +; CHECK: lr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: lr [[NEW]], [[LIMIT]] +; CHECK: cs %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw min i32 *%ptr, i32 42 seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll new file mode 100644 index 0000000..6897854 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll @@ -0,0 +1,143 @@ +; Test 64-bit atomic minimum and maximum. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check signed minium. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: cgr %r2, %r4 +; CHECK: lgr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: lgr [[NEW]], %r4 +; CHECK: csg %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw min i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check signed maximum. +define i64 @f2(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f2: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: cgr %r2, %r4 +; CHECK: lgr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: lgr [[NEW]], %r4 +; CHECK: csg %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw max i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check unsigned minimum. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f3: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: clgr %r2, %r4 +; CHECK: lgr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: lgr [[NEW]], %r4 +; CHECK: csg %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw umin i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check unsigned maximum. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f4: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: clgr %r2, %r4 +; CHECK: lgr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}he [[KEEP:\..*]] +; CHECK: lgr [[NEW]], %r4 +; CHECK: csg %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw umax i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check the high end of the aligned CSG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f5: +; CHECK: lg %r2, 524280(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw min i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which requires separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: lg %r2, 0(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw min i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the CSG range. +define i64 @f7(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f7: +; CHECK: lg %r2, -524288(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw min i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which requires separate address logic. +define i64 @f8(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f8: +; CHECK: agfi %r3, -524296 +; CHECK: lg %r2, 0(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw min i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check that indexed addresses are not allowed. +define i64 @f9(i64 %dummy, i64 %base, i64 %index, i64 %b) { +; CHECK: f9: +; CHECK: agr %r3, %r4 +; CHECK: lg %r2, 0(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i64 * + %res = atomicrmw min i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check that constants are forced into a register. +define i64 @f10(i64 %dummy, i64 *%ptr) { +; CHECK: f10: +; CHECK: lghi [[LIMIT:%r[0-9]+]], 42 +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LOOP:\.[^:]*]]: +; CHECK: cgr %r2, [[LIMIT]] +; CHECK: lgr [[NEW:%r[0-9]+]], %r2 +; CHECK: j{{g?}}le [[KEEP:\..*]] +; CHECK: lgr [[NEW]], [[LIMIT]] +; CHECK: csg %r2, [[NEW]], 0(%r3) +; CHECK: j{{g?}}lh [[LOOP]] +; CHECK: br %r14 + %res = atomicrmw min i64 *%ptr, i64 42 seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-01.ll b/test/CodeGen/SystemZ/atomicrmw-nand-01.ll new file mode 100644 index 0000000..1ede3b4 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-nand-01.ll @@ -0,0 +1,139 @@ +; Test 8-bit atomic NANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check NAND of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used, and that the low bits are set to 1. This sequence is +; independent of the other loop prologue instructions. +define i8 @f1(i8 *%src, i8 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: nr [[ROT]], %r3 +; CHECK: xilf [[ROT]], 4278190080 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: oilf %r3, 16777215 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: nr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check the minimum signed value. We AND the rotated word with 0x80ffffff. +define i8 @f2(i8 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: nilh [[ROT]], 33023 +; CHECK: xilf [[ROT]], 4278190080 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i8 *%src, i8 -128 seq_cst + ret i8 %res +} + +; Check NANDs of -2 (-1 isn't useful). We AND the rotated word with 0xfeffffff. +define i8 @f3(i8 *%src) { +; CHECK: f3: +; CHECK: nilh [[ROT]], 65279 +; CHECK: xilf [[ROT]], 4278190080 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i8 *%src, i8 -2 seq_cst + ret i8 %res +} + +; Check NANDs of 1. We AND the rotated word with 0x01ffffff. +define i8 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: nilh [[ROT]], 511 +; CHECK: xilf [[ROT]], 4278190080 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i8 *%src, i8 1 seq_cst + ret i8 %res +} + +; Check the maximum signed value. We AND the rotated word with 0x7fffffff. +define i8 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: nilh [[ROT]], 32767 +; CHECK: xilf [[ROT]], 4278190080 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i8 *%src, i8 127 seq_cst + ret i8 %res +} + +; Check NANDs of a large unsigned value. We AND the rotated word with +; 0xfdffffff. +define i8 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: nilh [[ROT]], 65023 +; CHECK: xilf [[ROT]], 4278190080 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i8 *%src, i8 253 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-02.ll b/test/CodeGen/SystemZ/atomicrmw-nand-02.ll new file mode 100644 index 0000000..d5cf864 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-nand-02.ll @@ -0,0 +1,139 @@ +; Test 16-bit atomic NANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check NAND of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used, and that the low bits are set to 1. This sequence is +; independent of the other loop prologue instructions. +define i16 @f1(i16 *%src, i16 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: nr [[ROT]], %r3 +; CHECK: xilf [[ROT]], 4294901760 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: oill %r3, 65535 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: nr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check the minimum signed value. We AND the rotated word with 0x8000ffff. +define i16 @f2(i16 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: nilh [[ROT]], 32768 +; CHECK: xilf [[ROT]], 4294901760 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i16 *%src, i16 -32768 seq_cst + ret i16 %res +} + +; Check NANDs of -2 (-1 isn't useful). We AND the rotated word with 0xfffeffff. +define i16 @f3(i16 *%src) { +; CHECK: f3: +; CHECK: nilh [[ROT]], 65534 +; CHECK: xilf [[ROT]], 4294901760 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i16 *%src, i16 -2 seq_cst + ret i16 %res +} + +; Check ANDs of 1. We AND the rotated word with 0x0001ffff. +define i16 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: nilh [[ROT]], 1 +; CHECK: xilf [[ROT]], 4294901760 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i16 *%src, i16 1 seq_cst + ret i16 %res +} + +; Check the maximum signed value. We AND the rotated word with 0x7fffffff. +define i16 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: nilh [[ROT]], 32767 +; CHECK: xilf [[ROT]], 4294901760 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i16 *%src, i16 32767 seq_cst + ret i16 %res +} + +; Check NANDs of a large unsigned value. We AND the rotated word with +; 0xfffdffff. +define i16 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: nilh [[ROT]], 65533 +; CHECK: xilf [[ROT]], 4294901760 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw nand i16 *%src, i16 65533 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-03.ll b/test/CodeGen/SystemZ/atomicrmw-nand-03.ll new file mode 100644 index 0000000..cc2a086 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-nand-03.ll @@ -0,0 +1,93 @@ +; Test 32-bit atomic NANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check NANDs of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f1: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lr %r0, %r2 +; CHECK: nr %r0, %r4 +; CHECK: xilf %r0, 4294967295 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw nand i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check NANDs of 1. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK: f2: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lr %r0, %r2 +; CHECK: nilf %r0, 1 +; CHECK: xilf %r0, 4294967295 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw nand i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check NANDs of the low end of the NILH range. +define i32 @f3(i32 %dummy, i32 *%src) { +; CHECK: f3: +; CHECK: nilh %r0, 0 +; CHECK: xilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw nand i32 *%src, i32 65535 seq_cst + ret i32 %res +} + +; Check the next value up, which must use NILF. +define i32 @f4(i32 %dummy, i32 *%src) { +; CHECK: f4: +; CHECK: nilf %r0, 65536 +; CHECK: xilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw nand i32 *%src, i32 65536 seq_cst + ret i32 %res +} + +; Check the largest useful NILL value. +define i32 @f5(i32 %dummy, i32 *%src) { +; CHECK: f5: +; CHECK: nill %r0, 65534 +; CHECK: xilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw nand i32 *%src, i32 -2 seq_cst + ret i32 %res +} + +; Check the low end of the NILL range. +define i32 @f6(i32 %dummy, i32 *%src) { +; CHECK: f6: +; CHECK: nill %r0, 0 +; CHECK: xilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw nand i32 *%src, i32 -65536 seq_cst + ret i32 %res +} + +; Check the largest useful NILH value, which is one less than the above. +define i32 @f7(i32 %dummy, i32 *%src) { +; CHECK: f7: +; CHECK: nilh %r0, 65534 +; CHECK: xilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw nand i32 *%src, i32 -65537 seq_cst + ret i32 %res +} + +; Check the highest useful NILF value, which is one less than the above. +define i32 @f8(i32 %dummy, i32 *%src) { +; CHECK: f8: +; CHECK: nilf %r0, 4294901758 +; CHECK: xilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw nand i32 *%src, i32 -65538 seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-04.ll b/test/CodeGen/SystemZ/atomicrmw-nand-04.ll new file mode 100644 index 0000000..0c857d9 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-nand-04.ll @@ -0,0 +1,183 @@ +; Test 64-bit atomic NANDs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check NANDs of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: ngr %r0, %r4 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check NANDs of 1, which must be done using a register. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK: f2: +; CHECK: ngr +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the low end of the NIHF range. +define i64 @f3(i64 %dummy, i64 *%src) { +; CHECK: f3: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: nihf %r0, 0 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 4294967295 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register. +define i64 @f4(i64 %dummy, i64 *%src) { +; CHECK: f4: +; CHECK: ngr +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 4294967296 seq_cst + ret i64 %res +} + +; Check the low end of the NIHH range. +define i64 @f5(i64 %dummy, i64 *%src) { +; CHECK: f5: +; CHECK: nihh %r0, 0 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 281474976710655 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register. +define i64 @f6(i64 %dummy, i64 *%src) { +; CHECK: f6: +; CHECK: ngr +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 281474976710656 seq_cst + ret i64 %res +} + +; Check the highest useful NILL value. +define i64 @f7(i64 %dummy, i64 *%src) { +; CHECK: f7: +; CHECK: nill %r0, 65534 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -2 seq_cst + ret i64 %res +} + +; Check the low end of the NILL range. +define i64 @f8(i64 %dummy, i64 *%src) { +; CHECK: f8: +; CHECK: nill %r0, 0 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -65536 seq_cst + ret i64 %res +} + +; Check the highest useful NILH value, which is one less than the above. +define i64 @f9(i64 %dummy, i64 *%src) { +; CHECK: f9: +; CHECK: nilh %r0, 65534 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -65537 seq_cst + ret i64 %res +} + +; Check the highest useful NILF value, which is one less than the above. +define i64 @f10(i64 %dummy, i64 *%src) { +; CHECK: f10: +; CHECK: nilf %r0, 4294901758 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -65538 seq_cst + ret i64 %res +} + +; Check the low end of the NILH range. +define i64 @f11(i64 %dummy, i64 *%src) { +; CHECK: f11: +; CHECK: nilh %r0, 0 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -4294901761 seq_cst + ret i64 %res +} + +; Check the low end of the NILF range. +define i64 @f12(i64 %dummy, i64 *%src) { +; CHECK: f12: +; CHECK: nilf %r0, 0 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -4294967296 seq_cst + ret i64 %res +} + +; Check the highest useful NIHL value, which is one less than the above. +define i64 @f13(i64 %dummy, i64 *%src) { +; CHECK: f13: +; CHECK: nihl %r0, 65534 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -4294967297 seq_cst + ret i64 %res +} + +; Check the low end of the NIHL range. +define i64 @f14(i64 %dummy, i64 *%src) { +; CHECK: f14: +; CHECK: nihl %r0, 0 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -281470681743361 seq_cst + ret i64 %res +} + +; Check the highest useful NIHH value, which is 1<<32 less than the above. +define i64 @f15(i64 %dummy, i64 *%src) { +; CHECK: f15: +; CHECK: nihh %r0, 65534 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -281474976710657 seq_cst + ret i64 %res +} + +; Check the highest useful NIHF value, which is 1<<32 less than the above. +define i64 @f16(i64 %dummy, i64 *%src) { +; CHECK: f16: +; CHECK: nihf %r0, 4294901758 +; CHECK: lcgr %r0, %r0 +; CHECK: aghi %r0, -1 +; CHECK: br %r14 + %res = atomicrmw nand i64 *%src, i64 -281479271677953 seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-or-01.ll b/test/CodeGen/SystemZ/atomicrmw-or-01.ll new file mode 100644 index 0000000..31303b7 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-or-01.ll @@ -0,0 +1,132 @@ +; Test 8-bit atomic ORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check OR of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used. This shift is independent of the other loop prologue +; instructions. +define i8 @f1(i8 *%src, i8 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: or [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: or {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check the minimum signed value. We OR the rotated word with 0x80000000. +define i8 @f2(i8 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: oilh [[ROT]], 32768 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i8 *%src, i8 -128 seq_cst + ret i8 %res +} + +; Check ORs of -2 (-1 isn't useful). We OR the rotated word with 0xfe000000. +define i8 @f3(i8 *%src) { +; CHECK: f3: +; CHECK: oilh [[ROT]], 65024 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i8 *%src, i8 -2 seq_cst + ret i8 %res +} + +; Check ORs of 1. We OR the rotated word with 0x01000000. +define i8 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: oilh [[ROT]], 256 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i8 *%src, i8 1 seq_cst + ret i8 %res +} + +; Check the maximum signed value. We OR the rotated word with 0x7f000000. +define i8 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: oilh [[ROT]], 32512 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i8 *%src, i8 127 seq_cst + ret i8 %res +} + +; Check ORs of a large unsigned value. We OR the rotated word with +; 0xfd000000. +define i8 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: oilh [[ROT]], 64768 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i8 *%src, i8 253 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-or-02.ll b/test/CodeGen/SystemZ/atomicrmw-or-02.ll new file mode 100644 index 0000000..9880d0b --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-or-02.ll @@ -0,0 +1,132 @@ +; Test 16-bit atomic ORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check OR of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used. This shift is independent of the other loop prologue +; instructions. +define i16 @f1(i16 *%src, i16 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: or [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: or {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check the minimum signed value. We OR the rotated word with 0x80000000. +define i16 @f2(i16 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: oilh [[ROT]], 32768 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i16 *%src, i16 -32768 seq_cst + ret i16 %res +} + +; Check ORs of -2 (-1 isn't useful). We OR the rotated word with 0xfffe0000. +define i16 @f3(i16 *%src) { +; CHECK: f3: +; CHECK: oilh [[ROT]], 65534 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i16 *%src, i16 -2 seq_cst + ret i16 %res +} + +; Check ORs of 1. We OR the rotated word with 0x00010000. +define i16 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: oilh [[ROT]], 1 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i16 *%src, i16 1 seq_cst + ret i16 %res +} + +; Check the maximum signed value. We OR the rotated word with 0x7fff0000. +define i16 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: oilh [[ROT]], 32767 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i16 *%src, i16 32767 seq_cst + ret i16 %res +} + +; Check ORs of a large unsigned value. We OR the rotated word with +; 0xfffd0000. +define i16 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: oilh [[ROT]], 65533 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw or i16 *%src, i16 65533 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-or-03.ll b/test/CodeGen/SystemZ/atomicrmw-or-03.ll new file mode 100644 index 0000000..33fd21b --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-or-03.ll @@ -0,0 +1,85 @@ +; Test 32-bit atomic ORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ORs of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f1: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lr %r0, %r2 +; CHECK: or %r0, %r4 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check the lowest useful OILL value. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK: f2: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lr %r0, %r2 +; CHECK: oill %r0, 1 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the OILL range. +define i32 @f3(i32 %dummy, i32 *%src) { +; CHECK: f3: +; CHECK: oill %r0, 65535 +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 65535 seq_cst + ret i32 %res +} + +; Check the lowest useful OILH value, which is the next value up. +define i32 @f4(i32 %dummy, i32 *%src) { +; CHECK: f4: +; CHECK: oilh %r0, 1 +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 65536 seq_cst + ret i32 %res +} + +; Check the lowest useful OILF value, which is the next value up. +define i32 @f5(i32 %dummy, i32 *%src) { +; CHECK: f5: +; CHECK: oilf %r0, 65537 +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 65537 seq_cst + ret i32 %res +} + +; Check the high end of the OILH range. +define i32 @f6(i32 %dummy, i32 *%src) { +; CHECK: f6: +; CHECK: oilh %r0, 65535 +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 -65536 seq_cst + ret i32 %res +} + +; Check the next value up, which must use OILF. +define i32 @f7(i32 %dummy, i32 *%src) { +; CHECK: f7: +; CHECK: oilf %r0, 4294901761 +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 -65535 seq_cst + ret i32 %res +} + +; Check the largest useful OILF value. +define i32 @f8(i32 %dummy, i32 *%src) { +; CHECK: f8: +; CHECK: oilf %r0, 4294967294 +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 -2 seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-or-04.ll b/test/CodeGen/SystemZ/atomicrmw-or-04.ll new file mode 100644 index 0000000..a74f6f9 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-or-04.ll @@ -0,0 +1,158 @@ +; Test 64-bit atomic ORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ORs of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: ogr %r0, %r4 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check the lowest useful OILL value. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK: f2: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: oill %r0, 1 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the OILL range. +define i64 @f3(i64 %dummy, i64 *%src) { +; CHECK: f3: +; CHECK: oill %r0, 65535 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 65535 seq_cst + ret i64 %res +} + +; Check the lowest useful OILH value, which is the next value up. +define i64 @f4(i64 %dummy, i64 *%src) { +; CHECK: f4: +; CHECK: oilh %r0, 1 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 65536 seq_cst + ret i64 %res +} + +; Check the lowest useful OILF value, which is the next value up again. +define i64 @f5(i64 %dummy, i64 *%src) { +; CHECK: f5: +; CHECK: oilf %r0, 65537 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 65537 seq_cst + ret i64 %res +} + +; Check the high end of the OILH range. +define i64 @f6(i64 %dummy, i64 *%src) { +; CHECK: f6: +; CHECK: oilh %r0, 65535 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 4294901760 seq_cst + ret i64 %res +} + +; Check the next value up, which must use OILF. +define i64 @f7(i64 %dummy, i64 *%src) { +; CHECK: f7: +; CHECK: oilf %r0, 4294901761 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 4294901761 seq_cst + ret i64 %res +} + +; Check the high end of the OILF range. +define i64 @f8(i64 %dummy, i64 *%src) { +; CHECK: f8: +; CHECK: oilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 4294967295 seq_cst + ret i64 %res +} + +; Check the lowest useful OIHL value, which is one greater than above. +define i64 @f9(i64 %dummy, i64 *%src) { +; CHECK: f9: +; CHECK: oihl %r0, 1 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 4294967296 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register. (We could use +; combinations of OIH* and OIL* instead, but that isn't implemented.) +define i64 @f10(i64 %dummy, i64 *%src) { +; CHECK: f10: +; CHECK: ogr +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 4294967297 seq_cst + ret i64 %res +} + +; Check the high end of the OIHL range. +define i64 @f11(i64 %dummy, i64 *%src) { +; CHECK: f11: +; CHECK: oihl %r0, 65535 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 281470681743360 seq_cst + ret i64 %res +} + +; Check the lowest useful OIHH value, which is 1<<32 greater than above. +define i64 @f12(i64 %dummy, i64 *%src) { +; CHECK: f12: +; CHECK: oihh %r0, 1 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 281474976710656 seq_cst + ret i64 %res +} + +; Check the lowest useful OIHF value, which is 1<<32 greater again. +define i64 @f13(i64 %dummy, i64 *%src) { +; CHECK: f13: +; CHECK: oihf %r0, 65537 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 281479271677952 seq_cst + ret i64 %res +} + +; Check the high end of the OIHH range. +define i64 @f14(i64 %dummy, i64 *%src) { +; CHECK: f14: +; CHECK: oihh %r0, 65535 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 18446462598732840960 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register. +define i64 @f15(i64 %dummy, i64 *%src) { +; CHECK: f15: +; CHECK: ogr +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 18446462598732840961 seq_cst + ret i64 %res +} + +; Check the high end of the OIHF range. +define i64 @f16(i64 %dummy, i64 *%src) { +; CHECK: f16: +; CHECK: oihf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 -4294967296 seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-01.ll b/test/CodeGen/SystemZ/atomicrmw-sub-01.ll new file mode 100644 index 0000000..d073dc5 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-sub-01.ll @@ -0,0 +1,132 @@ +; Test 8-bit atomic subtractions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check subtraction of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used. This shift is independent of the other loop prologue +; instructions. +define i8 @f1(i8 *%src, i8 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: sr [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: sr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check the minimum signed value. We add 0x80000000 to the rotated word. +define i8 @f2(i8 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: afi [[ROT]], -2147483648 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i8 *%src, i8 -128 seq_cst + ret i8 %res +} + +; Check subtraction of -1. We add 0x01000000 to the rotated word. +define i8 @f3(i8 *%src) { +; CHECK: f3: +; CHECK: afi [[ROT]], 16777216 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i8 *%src, i8 -1 seq_cst + ret i8 %res +} + +; Check subtraction of -1. We add 0xff000000 to the rotated word. +define i8 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: afi [[ROT]], -16777216 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i8 *%src, i8 1 seq_cst + ret i8 %res +} + +; Check the maximum signed value. We add 0x81000000 to the rotated word. +define i8 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: afi [[ROT]], -2130706432 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i8 *%src, i8 127 seq_cst + ret i8 %res +} + +; Check subtraction of a large unsigned value. We add 0x02000000 to the +; rotated word. +define i8 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: afi [[ROT]], 33554432 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i8 *%src, i8 254 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-02.ll b/test/CodeGen/SystemZ/atomicrmw-sub-02.ll new file mode 100644 index 0000000..449d92f --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-sub-02.ll @@ -0,0 +1,132 @@ +; Test 16-bit atomic subtractions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check subtraction of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used. This shift is independent of the other loop prologue +; instructions. +define i16 @f1(i16 *%src, i16 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: sr [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: sr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check the minimum signed value. We add 0x80000000 to the rotated word. +define i16 @f2(i16 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: afi [[ROT]], -2147483648 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i16 *%src, i16 -32768 seq_cst + ret i16 %res +} + +; Check subtraction of -1. We add 0x00010000 to the rotated word. +define i16 @f3(i16 *%src) { +; CHECK: f3: +; CHECK: afi [[ROT]], 65536 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i16 *%src, i16 -1 seq_cst + ret i16 %res +} + +; Check subtraction of 1. We add 0xffff0000 to the rotated word. +define i16 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: afi [[ROT]], -65536 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i16 *%src, i16 1 seq_cst + ret i16 %res +} + +; Check the maximum signed value. We add 0x80010000 to the rotated word. +define i16 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: afi [[ROT]], -2147418112 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i16 *%src, i16 32767 seq_cst + ret i16 %res +} + +; Check subtraction of a large unsigned value. We add 0x00020000 to the +; rotated word. +define i16 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: afi [[ROT]], 131072 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw sub i16 *%src, i16 65534 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-03.ll b/test/CodeGen/SystemZ/atomicrmw-sub-03.ll new file mode 100644 index 0000000..da07fb5 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-sub-03.ll @@ -0,0 +1,94 @@ +; Test 32-bit atomic subtractions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check subtraction of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f1: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lr %r0, %r2 +; CHECK: sr %r0, %r4 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check subtraction of 1, which can use AHI. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK: f2: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lr %r0, %r2 +; CHECK: ahi %r0, -1 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the low end of the AHI range. +define i32 @f3(i32 %dummy, i32 *%src) { +; CHECK: f3: +; CHECK: ahi %r0, -32768 +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 32768 seq_cst + ret i32 %res +} + +; Check the next value down, which must use AFI. +define i32 @f4(i32 %dummy, i32 *%src) { +; CHECK: f4: +; CHECK: afi %r0, -32769 +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 32769 seq_cst + ret i32 %res +} + +; Check the low end of the AFI range. +define i32 @f5(i32 %dummy, i32 *%src) { +; CHECK: f5: +; CHECK: afi %r0, -2147483648 +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 2147483648 seq_cst + ret i32 %res +} + +; Check the next value up, which gets treated as a positive operand. +define i32 @f6(i32 %dummy, i32 *%src) { +; CHECK: f6: +; CHECK: afi %r0, 2147483647 +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 2147483649 seq_cst + ret i32 %res +} + +; Check subtraction of -1, which can use AHI. +define i32 @f7(i32 %dummy, i32 *%src) { +; CHECK: f7: +; CHECK: ahi %r0, 1 +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 -1 seq_cst + ret i32 %res +} + +; Check the high end of the AHI range. +define i32 @f8(i32 %dummy, i32 *%src) { +; CHECK: f8: +; CHECK: ahi %r0, 32767 +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 -32767 seq_cst + ret i32 %res +} + +; Check the next value down, which must use AFI instead. +define i32 @f9(i32 %dummy, i32 *%src) { +; CHECK: f9: +; CHECK: afi %r0, 32768 +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 -32768 seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-04.ll b/test/CodeGen/SystemZ/atomicrmw-sub-04.ll new file mode 100644 index 0000000..26f75af --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-sub-04.ll @@ -0,0 +1,112 @@ +; Test 64-bit atomic subtractions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check subtraction of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: sgr %r0, %r4 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check subtraction of 1, which can use AGHI. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK: f2: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: aghi %r0, -1 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the low end of the AGHI range. +define i64 @f3(i64 %dummy, i64 *%src) { +; CHECK: f3: +; CHECK: aghi %r0, -32768 +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 32768 seq_cst + ret i64 %res +} + +; Check the next value up, which must use AGFI. +define i64 @f4(i64 %dummy, i64 *%src) { +; CHECK: f4: +; CHECK: agfi %r0, -32769 +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 32769 seq_cst + ret i64 %res +} + +; Check the low end of the AGFI range. +define i64 @f5(i64 %dummy, i64 *%src) { +; CHECK: f5: +; CHECK: agfi %r0, -2147483648 +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 2147483648 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register operation. +define i64 @f6(i64 %dummy, i64 *%src) { +; CHECK: f6: +; CHECK: sgr +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 2147483649 seq_cst + ret i64 %res +} + +; Check subtraction of -1, which can use AGHI. +define i64 @f7(i64 %dummy, i64 *%src) { +; CHECK: f7: +; CHECK: aghi %r0, 1 +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 -1 seq_cst + ret i64 %res +} + +; Check the high end of the AGHI range. +define i64 @f8(i64 %dummy, i64 *%src) { +; CHECK: f8: +; CHECK: aghi %r0, 32767 +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 -32767 seq_cst + ret i64 %res +} + +; Check the next value down, which must use AGFI instead. +define i64 @f9(i64 %dummy, i64 *%src) { +; CHECK: f9: +; CHECK: agfi %r0, 32768 +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 -32768 seq_cst + ret i64 %res +} + +; Check the high end of the AGFI range. +define i64 @f10(i64 %dummy, i64 *%src) { +; CHECK: f10: +; CHECK: agfi %r0, 2147483647 +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 -2147483647 seq_cst + ret i64 %res +} + +; Check the next value down, which must use a register operation. +define i64 @f11(i64 %dummy, i64 *%src) { +; CHECK: f11: +; CHECK: sgr +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 -2147483648 seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll new file mode 100644 index 0000000..e33597b --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll @@ -0,0 +1,55 @@ +; Test 8-bit atomic exchange. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT + +; Check exchange with a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. CHECK-SHIFT also checks that %r3 is not modified before +; being used in the RISBG (in contrast to things like atomic addition, +; which shift %r3 left so that %b is at the high end of the word). +define i8 @f1(i8 *%src, i8 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: risbg [[ROT]], %r3, 32, 39, 24 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT: f1: +; CHECK-SHIFT-NOT: %r3 +; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT-NOT: %r3 +; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT-NOT: %r3 +; CHECK-SHIFT: rll +; CHECK-SHIFT-NOT: %r3 +; CHECK-SHIFT: risbg {{%r[0-9]+}}, %r3, 32, 39, 24 +; CHECK-SHIFT: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT: rll +; CHECK-SHIFT: br %r14 + %res = atomicrmw xchg i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check exchange with a constant. We should force the constant into +; a register and use the sequence above. +define i8 @f2(i8 *%src) { +; CHECK: f2: +; CHECK: lhi [[VALUE:%r[0-9]+]], 88 +; CHECK: risbg {{%r[0-9]+}}, [[VALUE]], 32, 39, 24 +; CHECK: br %r14 +; +; CHECK-SHIFT: f2: +; CHECK-SHIFT: br %r14 + %res = atomicrmw xchg i8 *%src, i8 88 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll new file mode 100644 index 0000000..31f8026 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll @@ -0,0 +1,55 @@ +; Test 16-bit atomic exchange. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT + +; Check exchange with a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. CHECK-SHIFT also checks that %r3 is not modified before +; being used in the RISBG (in contrast to things like atomic addition, +; which shift %r3 left so that %b is at the high end of the word). +define i16 @f1(i16 *%src, i16 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: risbg [[ROT]], %r3, 32, 47, 16 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT: f1: +; CHECK-SHIFT-NOT: %r3 +; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT-NOT: %r3 +; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT-NOT: %r3 +; CHECK-SHIFT: rll +; CHECK-SHIFT-NOT: %r3 +; CHECK-SHIFT: risbg {{%r[0-9]+}}, %r3, 32, 47, 16 +; CHECK-SHIFT: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT: rll +; CHECK-SHIFT: br %r14 + %res = atomicrmw xchg i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check exchange with a constant. We should force the constant into +; a register and use the sequence above. +define i16 @f2(i16 *%src) { +; CHECK: f2: +; CHECK: lhi [[VALUE:%r[0-9]+]], -25536 +; CHECK: risbg {{%r[0-9]+}}, [[VALUE]], 32, 47, 16 +; CHECK: br %r14 +; +; CHECK-SHIFT: f2: +; CHECK-SHIFT: br %r14 + %res = atomicrmw xchg i16 *%src, i16 40000 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll new file mode 100644 index 0000000..37581ab --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll @@ -0,0 +1,122 @@ +; Test 32-bit atomic exchange. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register exchange. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f1: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: cs %r2, %r4, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw xchg i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check the high end of the aligned CS range. +define i32 @f2(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f2: +; CHECK: l %r2, 4092(%r3) +; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which requires CSY. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f3: +; CHECK: ly %r2, 4096(%r3) +; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the high end of the aligned CSY range. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f4: +; CHECK: ly %r2, 524284(%r3) +; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f5: +; CHECK: agfi %r3, 524288 +; CHECK: l %r2, 0(%r3) +; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the high end of the negative aligned CSY range. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f6: +; CHECK: ly %r2, -4(%r3) +; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the CSY range. +define i32 @f7(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f7: +; CHECK: ly %r2, -524288(%r3) +; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f8(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f8: +; CHECK: agfi %r3, -524292 +; CHECK: l %r2, 0(%r3) +; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check that indexed addresses are not allowed. +define i32 @f9(i32 %dummy, i64 %base, i64 %index, i32 %b) { +; CHECK: f9: +; CHECK: agr %r3, %r4 +; CHECK: l %r2, 0(%r3) +; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check exchange of a constant. We should force it into a register and +; use the sequence above. +define i32 @f10(i32 %dummy, i32 *%src) { +; CHECK: f10: +; CHECK: llill [[VALUE:%r[0-9+]]], 40000 +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: cs %r2, [[VALUE]], 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw xchg i32 *%src, i32 40000 seq_cst + ret i32 %res +} + diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll new file mode 100644 index 0000000..a68295e --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll @@ -0,0 +1,88 @@ +; Test 64-bit atomic exchange. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register exchange. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: csg %r2, %r4, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw xchg i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check the high end of the aligned CSG range. +define i64 @f2(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f2: +; CHECK: lg %r2, 524280(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which requires separate address logic. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f3: +; CHECK: agfi %r3, 524288 +; CHECK: lg %r2, 0(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the CSG range. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f4: +; CHECK: lg %r2, -524288(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which requires separate address logic. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f5: +; CHECK: agfi %r3, -524296 +; CHECK: lg %r2, 0(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check that indexed addresses are not allowed. +define i64 @f6(i64 %dummy, i64 %base, i64 %index, i64 %b) { +; CHECK: f6: +; CHECK: agr %r3, %r4 +; CHECK: lg %r2, 0(%r3) +; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i64 * + %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check exchange of a constant. We should force it into a register and +; use the sequence above. +define i64 @f7(i64 %dummy, i64 *%ptr) { +; CHECK: f7: +; CHECK: llilf [[VALUE:%r[0-9+]]], 3000000000 +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: csg %r2, [[VALUE]], 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw xchg i64 *%ptr, i64 3000000000 seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-01.ll b/test/CodeGen/SystemZ/atomicrmw-xor-01.ll new file mode 100644 index 0000000..13cdf02 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xor-01.ll @@ -0,0 +1,132 @@ +; Test 8-bit atomic XORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check XOR of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used. This shift is independent of the other loop prologue +; instructions. +define i8 @f1(i8 *%src, i8 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: xr [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 24 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: xr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i8 *%src, i8 %b seq_cst + ret i8 %res +} + +; Check the minimum signed value. We XOR the rotated word with 0x80000000. +define i8 @f2(i8 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: xilf [[ROT]], 2147483648 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i8 *%src, i8 -128 seq_cst + ret i8 %res +} + +; Check XORs of -1. We XOR the rotated word with 0xff000000. +define i8 @f3(i8 *%src) { +; CHECK: f3: +; CHECK: xilf [[ROT]], 4278190080 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i8 *%src, i8 -1 seq_cst + ret i8 %res +} + +; Check XORs of 1. We XOR the rotated word with 0x01000000. +define i8 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: xilf [[ROT]], 16777216 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i8 *%src, i8 1 seq_cst + ret i8 %res +} + +; Check the maximum signed value. We XOR the rotated word with 0x7f000000. +define i8 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: xilf [[ROT]], 2130706432 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i8 *%src, i8 127 seq_cst + ret i8 %res +} + +; Check XORs of a large unsigned value. We XOR the rotated word with +; 0xfd000000. +define i8 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: xilf [[ROT]], 4244635648 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i8 *%src, i8 253 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-02.ll b/test/CodeGen/SystemZ/atomicrmw-xor-02.ll new file mode 100644 index 0000000..4faa64f --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xor-02.ll @@ -0,0 +1,132 @@ +; Test 16-bit atomic XORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1 +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2 + +; Check XOR of a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT1 makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. +; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word +; before being used. This shift is independent of the other loop prologue +; instructions. +define i16 @f1(i16 *%src, i16 %b) { +; CHECK: f1: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: xr [[ROT]], %r3 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}}) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f1: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f1: +; CHECK-SHIFT2: sll %r3, 16 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: xr {{%r[0-9]+}}, %r3 +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: rll +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i16 *%src, i16 %b seq_cst + ret i16 %res +} + +; Check the minimum signed value. We XOR the rotated word with 0x80000000. +define i16 @f2(i16 *%src) { +; CHECK: f2: +; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK: nill %r2, 65532 +; CHECK: l [[OLD:%r[0-9]+]], 0(%r2) +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]]) +; CHECK: xilf [[ROT]], 2147483648 +; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]]) +; CHECK: cs [[OLD]], [[NEW]], 0(%r2) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK: br %r14 +; +; CHECK-SHIFT1: f2: +; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3 +; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]]) +; CHECK-SHIFT1: rll +; CHECK-SHIFT1: br %r14 +; +; CHECK-SHIFT2: f2: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i16 *%src, i16 -32768 seq_cst + ret i16 %res +} + +; Check XORs of -1. We XOR the rotated word with 0xffff0000. +define i16 @f3(i16 *%src) { +; CHECK: f3: +; CHECK: xilf [[ROT]], 4294901760 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f3: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f3: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i16 *%src, i16 -1 seq_cst + ret i16 %res +} + +; Check XORs of 1. We XOR the rotated word with 0x00010000. +define i16 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: xilf [[ROT]], 65536 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f4: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f4: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i16 *%src, i16 1 seq_cst + ret i16 %res +} + +; Check the maximum signed value. We XOR the rotated word with 0x7fff0000. +define i16 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: xilf [[ROT]], 2147418112 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f5: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f5: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i16 *%src, i16 32767 seq_cst + ret i16 %res +} + +; Check XORs of a large unsigned value. We XOR the rotated word with +; 0xfffd0000. +define i16 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: xilf [[ROT]], 4294770688 +; CHECK: br %r14 +; +; CHECK-SHIFT1: f6: +; CHECK-SHIFT1: br %r14 +; CHECK-SHIFT2: f6: +; CHECK-SHIFT2: br %r14 + %res = atomicrmw xor i16 *%src, i16 65533 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-03.ll b/test/CodeGen/SystemZ/atomicrmw-xor-03.ll new file mode 100644 index 0000000..23884f8 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xor-03.ll @@ -0,0 +1,49 @@ +; Test 32-bit atomic XORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check XORs of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f1: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lr %r0, %r2 +; CHECK: xr %r0, %r4 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw xor i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check the lowest useful constant. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK: f2: +; CHECK: l %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lr %r0, %r2 +; CHECK: xilf %r0, 1 +; CHECK: cs %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw xor i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check an arbitrary constant. +define i32 @f3(i32 %dummy, i32 *%src) { +; CHECK: f3: +; CHECK: xilf %r0, 3000000000 +; CHECK: br %r14 + %res = atomicrmw xor i32 *%src, i32 3000000000 seq_cst + ret i32 %res +} + +; Check bitwise negation. +define i32 @f4(i32 %dummy, i32 *%src) { +; CHECK: f4: +; CHECK: xilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw xor i32 *%src, i32 -1 seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-04.ll b/test/CodeGen/SystemZ/atomicrmw-xor-04.ll new file mode 100644 index 0000000..21130fb --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xor-04.ll @@ -0,0 +1,77 @@ +; Test 64-bit atomic XORs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check XORs of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: xgr %r0, %r4 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check the lowest useful XILF value. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK: f2: +; CHECK: lg %r2, 0(%r3) +; CHECK: [[LABEL:\.[^ ]*]]: +; CHECK: lgr %r0, %r2 +; CHECK: xilf %r0, 1 +; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: j{{g?}}lh [[LABEL]] +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the XILF range. +define i64 @f3(i64 %dummy, i64 *%src) { +; CHECK: f3: +; CHECK: xilf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 4294967295 seq_cst + ret i64 %res +} + +; Check the lowest useful XIHF value, which is one greater than above. +define i64 @f4(i64 %dummy, i64 *%src) { +; CHECK: f4: +; CHECK: xihf %r0, 1 +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 4294967296 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register. (We could use +; combinations of XIH* and XIL* instead, but that isn't implemented.) +define i64 @f5(i64 %dummy, i64 *%src) { +; CHECK: f5: +; CHECK: xgr +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 4294967297 seq_cst + ret i64 %res +} + +; Check the high end of the XIHF range. +define i64 @f6(i64 %dummy, i64 *%src) { +; CHECK: f6: +; CHECK: xihf %r0, 4294967295 +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 -4294967296 seq_cst + ret i64 %res +} + +; Check the next value up, which must use a register. +define i64 @f7(i64 %dummy, i64 *%src) { +; CHECK: f7: +; CHECK: xgr +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 -4294967295 seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/branch-01.ll b/test/CodeGen/SystemZ/branch-01.ll new file mode 100644 index 0000000..8ff91ac --- /dev/null +++ b/test/CodeGen/SystemZ/branch-01.ll @@ -0,0 +1,14 @@ +; Test a simple unconditional jump. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(i8 *%dest) { +; CHECK: f1: +; CHECK: .L[[LABEL:.*]]: +; CHECK: mvi 0(%r2), 1 +; CHECK: j{{g?}} .L[[LABEL]] + br label %loop +loop: + store volatile i8 1, i8 *%dest + br label %loop +} diff --git a/test/CodeGen/SystemZ/branch-02.ll b/test/CodeGen/SystemZ/branch-02.ll new file mode 100644 index 0000000..cde9b56 --- /dev/null +++ b/test/CodeGen/SystemZ/branch-02.ll @@ -0,0 +1,94 @@ +; Test all condition-code masks that are relevant for signed integer +; comparisons. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(i32 *%src, i32 %target) { +; CHECK: f1: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: c %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}e .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp eq i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f2(i32 *%src, i32 %target) { +; CHECK: f2: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: c %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}lh .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp ne i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f3(i32 *%src, i32 %target) { +; CHECK: f3: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: c %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}le .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp sle i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f4(i32 *%src, i32 %target) { +; CHECK: f4: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: c %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}l .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp slt i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f5(i32 *%src, i32 %target) { +; CHECK: f5: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: c %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}h .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp sgt i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f6(i32 *%src, i32 %target) { +; CHECK: f6: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: c %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}he .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp sge i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/branch-03.ll b/test/CodeGen/SystemZ/branch-03.ll new file mode 100644 index 0000000..1e447d0 --- /dev/null +++ b/test/CodeGen/SystemZ/branch-03.ll @@ -0,0 +1,63 @@ +; Test all condition-code masks that are relevant for unsigned integer +; comparisons. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +define void @f1(i32 *%src, i32 %target) { +; CHECK: f1: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: cl %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}le .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp ule i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f2(i32 *%src, i32 %target) { +; CHECK: f2: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: cl %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}l .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp ult i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f3(i32 *%src, i32 %target) { +; CHECK: f3: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: cl %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}h .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp ugt i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f4(i32 *%src, i32 %target) { +; CHECK: f4: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: cl %r3, 0(%r2) +; CHECK-NEXT: j{{g?}}he .L[[LABEL]] + br label %loop +loop: + %val = load volatile i32 *%src + %cond = icmp uge i32 %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/branch-04.ll b/test/CodeGen/SystemZ/branch-04.ll new file mode 100644 index 0000000..3d41750 --- /dev/null +++ b/test/CodeGen/SystemZ/branch-04.ll @@ -0,0 +1,218 @@ +; Test all condition-code masks that are relevant for floating-point +; comparisons. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @f1(float *%src, float %target) { +; CHECK: f1: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}e .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp oeq float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f2(float *%src, float %target) { +; CHECK: f2: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}lh .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp one float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f3(float *%src, float %target) { +; CHECK: f3: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}le .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp ole float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f4(float *%src, float %target) { +; CHECK: f4: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}l .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp olt float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f5(float *%src, float %target) { +; CHECK: f5: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}h .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp ogt float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f6(float *%src, float %target) { +; CHECK: f6: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}he .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp oge float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f7(float *%src, float %target) { +; CHECK: f7: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}nlh .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp ueq float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f8(float *%src, float %target) { +; CHECK: f8: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}ne .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp une float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f9(float *%src, float %target) { +; CHECK: f9: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}nh .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp ule float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f10(float *%src, float %target) { +; CHECK: f10: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}nhe .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp ult float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f11(float *%src, float %target) { +; CHECK: f11: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}nle .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp ugt float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +define void @f12(float *%src, float %target) { +; CHECK: f12: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}nl .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp uge float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; "jno" == "jump if no overflow", which corresponds to "jump if ordered" +; rather than "jump if not ordered" after a floating-point comparison. +define void @f13(float *%src, float %target) { +; CHECK: f13: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}no .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp ord float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} + +; "jo" == "jump if overflow", which corresponds to "jump if not ordered" +; rather than "jump if ordered" after a floating-point comparison. +define void @f14(float *%src, float %target) { +; CHECK: f14: +; CHECK: .cfi_startproc +; CHECK: .L[[LABEL:.*]]: +; CHECK: ceb %f0, 0(%r2) +; CHECK-NEXT: j{{g?}}o .L[[LABEL]] + br label %loop +loop: + %val = load volatile float *%src + %cond = fcmp uno float %target, %val + br i1 %cond, label %loop, label %exit +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/branch-05.ll b/test/CodeGen/SystemZ/branch-05.ll new file mode 100644 index 0000000..d149e0b --- /dev/null +++ b/test/CodeGen/SystemZ/branch-05.ll @@ -0,0 +1,58 @@ +; Test indirect jumps. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define i32 @f1(i32 %x, i32 %y, i32 %op) { +; CHECK: f1: +; CHECK: ahi %r4, -1 +; CHECK: clfi %r4, 5 +; CHECK-NEXT: j{{g?}}g +; CHECK: llgfr [[OP64:%r[0-5]]], %r4 +; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3 +; CHECK: larl [[BASE:%r[1-5]]] +; CHECK: lg [[TARGET:%r[1-5]]], 0([[BASE]],[[INDEX]]) +; CHECK: br [[TARGET]] +entry: + switch i32 %op, label %exit [ + i32 1, label %b.add + i32 2, label %b.sub + i32 3, label %b.and + i32 4, label %b.or + i32 5, label %b.xor + i32 6, label %b.mul + ] + +b.add: + %add = add i32 %x, %y + br label %exit + +b.sub: + %sub = sub i32 %x, %y + br label %exit + +b.and: + %and = and i32 %x, %y + br label %exit + +b.or: + %or = or i32 %x, %y + br label %exit + +b.xor: + %xor = xor i32 %x, %y + br label %exit + +b.mul: + %mul = mul i32 %x, %y + br label %exit + +exit: + %res = phi i32 [ %x, %entry ], + [ %add, %b.add ], + [ %sub, %b.sub ], + [ %and, %b.and ], + [ %or, %b.or ], + [ %xor, %b.xor ], + [ %mul, %b.mul ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/bswap-01.ll b/test/CodeGen/SystemZ/bswap-01.ll new file mode 100644 index 0000000..952903d --- /dev/null +++ b/test/CodeGen/SystemZ/bswap-01.ll @@ -0,0 +1,24 @@ +; Test byteswaps between registers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @llvm.bswap.i32(i32 %a) +declare i64 @llvm.bswap.i64(i64 %a) + +; Check 32-bit register-to-register byteswaps. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: lrvr [[REGISTER:%r[0-5]]], %r2 +; CHECk: br %r14 + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} + +; Check 64-bit register-to-register byteswaps. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: lrvgr %r2, %r2 +; CHECk: br %r14 + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} diff --git a/test/CodeGen/SystemZ/bswap-02.ll b/test/CodeGen/SystemZ/bswap-02.ll new file mode 100644 index 0000000..e9b7eb5 --- /dev/null +++ b/test/CodeGen/SystemZ/bswap-02.ll @@ -0,0 +1,87 @@ +; Test 32-bit byteswaps from memory to registers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @llvm.bswap.i32(i32 %a) + +; Check LRV with no displacement. +define i32 @f1(i32 *%src) { +; CHECK: f1: +; CHECK: lrv %r2, 0(%r2) +; CHECK: br %r14 + %a = load i32 *%src + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} + +; Check the high end of the aligned LRV range. +define i32 @f2(i32 *%src) { +; CHECK: f2: +; CHECK: lrv %r2, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %a = load i32 *%ptr + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f3(i32 *%src) { +; CHECK: f3: +; CHECK: agfi %r2, 524288 +; CHECK: lrv %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %a = load i32 *%ptr + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} + +; Check the high end of the negative aligned LRV range. +define i32 @f4(i32 *%src) { +; CHECK: f4: +; CHECK: lrv %r2, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %a = load i32 *%ptr + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} + +; Check the low end of the LRV range. +define i32 @f5(i32 *%src) { +; CHECK: f5: +; CHECK: lrv %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %a = load i32 *%ptr + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i32 *%src) { +; CHECK: f6: +; CHECK: agfi %r2, -524292 +; CHECK: lrv %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %a = load i32 *%ptr + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} + +; Check that LRV allows an index. +define i32 @f7(i64 %src, i64 %index) { +; CHECK: f7: +; CHECK: lrv %r2, 524287({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32 *%ptr + %swapped = call i32 @llvm.bswap.i32(i32 %a) + ret i32 %swapped +} diff --git a/test/CodeGen/SystemZ/bswap-03.ll b/test/CodeGen/SystemZ/bswap-03.ll new file mode 100644 index 0000000..2e6bcdc --- /dev/null +++ b/test/CodeGen/SystemZ/bswap-03.ll @@ -0,0 +1,87 @@ +; Test 64-bit byteswaps from memory to registers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @llvm.bswap.i64(i64 %a) + +; Check LRVG with no displacement. +define i64 @f1(i64 *%src) { +; CHECK: f1: +; CHECK: lrvg %r2, 0(%r2) +; CHECK: br %r14 + %a = load i64 *%src + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} + +; Check the high end of the aligned LRVG range. +define i64 @f2(i64 *%src) { +; CHECK: f2: +; CHECK: lrvg %r2, 524280(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %a = load i64 *%ptr + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f3(i64 *%src) { +; CHECK: f3: +; CHECK: agfi %r2, 524288 +; CHECK: lrvg %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %a = load i64 *%ptr + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} + +; Check the high end of the negative aligned LRVG range. +define i64 @f4(i64 *%src) { +; CHECK: f4: +; CHECK: lrvg %r2, -8(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %a = load i64 *%ptr + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} + +; Check the low end of the LRVG range. +define i64 @f5(i64 *%src) { +; CHECK: f5: +; CHECK: lrvg %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %a = load i64 *%ptr + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i64 *%src) { +; CHECK: f6: +; CHECK: agfi %r2, -524296 +; CHECK: lrvg %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %a = load i64 *%ptr + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} + +; Check that LRVG allows an index. +define i64 @f7(i64 %src, i64 %index) { +; CHECK: f7: +; CHECK: lrvg %r2, 524287({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64 *%ptr + %swapped = call i64 @llvm.bswap.i64(i64 %a) + ret i64 %swapped +} diff --git a/test/CodeGen/SystemZ/bswap-04.ll b/test/CodeGen/SystemZ/bswap-04.ll new file mode 100644 index 0000000..192327b --- /dev/null +++ b/test/CodeGen/SystemZ/bswap-04.ll @@ -0,0 +1,87 @@ +; Test 32-bit byteswaps from registers to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @llvm.bswap.i32(i32 %a) + +; Check STRV with no displacement. +define void @f1(i32 *%src, i32 %a) { +; CHECK: f1: +; CHECK: strv %r3, 0(%r2) +; CHECK: br %r14 + %swapped = call i32 @llvm.bswap.i32(i32 %a) + store i32 %swapped, i32 *%src + ret void +} + +; Check the high end of the aligned STRV range. +define void @f2(i32 *%src, i32 %a) { +; CHECK: f2: +; CHECK: strv %r3, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %swapped = call i32 @llvm.bswap.i32(i32 %a) + store i32 %swapped, i32 *%ptr + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f3(i32 *%src, i32 %a) { +; CHECK: f3: +; CHECK: agfi %r2, 524288 +; CHECK: strv %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %swapped = call i32 @llvm.bswap.i32(i32 %a) + store i32 %swapped, i32 *%ptr + ret void +} + +; Check the high end of the negative aligned STRV range. +define void @f4(i32 *%src, i32 %a) { +; CHECK: f4: +; CHECK: strv %r3, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %swapped = call i32 @llvm.bswap.i32(i32 %a) + store i32 %swapped, i32 *%ptr + ret void +} + +; Check the low end of the STRV range. +define void @f5(i32 *%src, i32 %a) { +; CHECK: f5: +; CHECK: strv %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %swapped = call i32 @llvm.bswap.i32(i32 %a) + store i32 %swapped, i32 *%ptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i32 *%src, i32 %a) { +; CHECK: f6: +; CHECK: agfi %r2, -524292 +; CHECK: strv %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %swapped = call i32 @llvm.bswap.i32(i32 %a) + store i32 %swapped, i32 *%ptr + ret void +} + +; Check that STRV allows an index. +define void @f7(i64 %src, i64 %index, i32 %a) { +; CHECK: f7: +; CHECK: strv %r4, 524287({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i32 * + %swapped = call i32 @llvm.bswap.i32(i32 %a) + store i32 %swapped, i32 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/bswap-05.ll b/test/CodeGen/SystemZ/bswap-05.ll new file mode 100644 index 0000000..e58cb80 --- /dev/null +++ b/test/CodeGen/SystemZ/bswap-05.ll @@ -0,0 +1,87 @@ +; Test 64-bit byteswaps from registers to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @llvm.bswap.i64(i64 %a) + +; Check STRVG with no displacement. +define void @f1(i64 *%src, i64 %a) { +; CHECK: f1: +; CHECK: strvg %r3, 0(%r2) +; CHECK: br %r14 + %swapped = call i64 @llvm.bswap.i64(i64 %a) + store i64 %swapped, i64 *%src + ret void +} + +; Check the high end of the aligned STRVG range. +define void @f2(i64 *%src, i64 %a) { +; CHECK: f2: +; CHECK: strvg %r3, 524280(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %swapped = call i64 @llvm.bswap.i64(i64 %a) + store i64 %swapped, i64 *%ptr + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f3(i64 *%src, i64 %a) { +; CHECK: f3: +; CHECK: agfi %r2, 524288 +; CHECK: strvg %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %swapped = call i64 @llvm.bswap.i64(i64 %a) + store i64 %swapped, i64 *%ptr + ret void +} + +; Check the high end of the negative aligned STRVG range. +define void @f4(i64 *%src, i64 %a) { +; CHECK: f4: +; CHECK: strvg %r3, -8(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %swapped = call i64 @llvm.bswap.i64(i64 %a) + store i64 %swapped, i64 *%ptr + ret void +} + +; Check the low end of the STRVG range. +define void @f5(i64 *%src, i64 %a) { +; CHECK: f5: +; CHECK: strvg %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %swapped = call i64 @llvm.bswap.i64(i64 %a) + store i64 %swapped, i64 *%ptr + ret void +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i64 *%src, i64 %a) { +; CHECK: f6: +; CHECK: agfi %r2, -524296 +; CHECK: strvg %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %swapped = call i64 @llvm.bswap.i64(i64 %a) + store i64 %swapped, i64 *%ptr + ret void +} + +; Check that STRVG allows an index. +define void @f7(i64 %src, i64 %index, i64 %a) { +; CHECK: f7: +; CHECK: strvg %r4, 524287({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %swapped = call i64 @llvm.bswap.i64(i64 %a) + store i64 %swapped, i64 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/call-01.ll b/test/CodeGen/SystemZ/call-01.ll new file mode 100644 index 0000000..1b9172b --- /dev/null +++ b/test/CodeGen/SystemZ/call-01.ll @@ -0,0 +1,18 @@ +; Test direct calls. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @bar() + +; We must allocate 160 bytes for the callee and save and restore %r14. +define i64 @f1() { +; CHECK: f1: +; CHECK: stmg %r14, %r15, 112(%r15) +; CHECK: aghi %r15, -160 +; CHECK: brasl %r14, bar@PLT +; CHECK: lmg %r14, %r15, 272(%r15) +; CHECK: br %r14 + %ret = call i64 @bar() + %inc = add i64 %ret, 1 + ret i64 %inc +} diff --git a/test/CodeGen/SystemZ/call-02.ll b/test/CodeGen/SystemZ/call-02.ll new file mode 100644 index 0000000..07dd67b --- /dev/null +++ b/test/CodeGen/SystemZ/call-02.ll @@ -0,0 +1,16 @@ +; Test indirect calls. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; We must allocate 160 bytes for the callee and save and restore %r14. +define i64 @f1(i64() *%bar) { +; CHECK: f1: +; CHECK: stmg %r14, %r15, 112(%r15) +; CHECK: aghi %r15, -160 +; CHECK: basr %r14, %r2 +; CHECK: lmg %r14, %r15, 272(%r15) +; CHECK: br %r14 + %ret = call i64 %bar() + %inc = add i64 %ret, 1 + ret i64 %inc +} diff --git a/test/CodeGen/SystemZ/cmpxchg-01.ll b/test/CodeGen/SystemZ/cmpxchg-01.ll new file mode 100644 index 0000000..477bcb0 --- /dev/null +++ b/test/CodeGen/SystemZ/cmpxchg-01.ll @@ -0,0 +1,56 @@ +; Test 8-bit compare and swap. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT + +; Check compare and swap with a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. CHECK-SHIFT also checks that %r3 is not modified before +; being used in the RISBG (in contrast to things like atomic addition, +; which shift %r3 left so that %b is at the high end of the word). +define i8 @f1(i8 %dummy, i8 *%src, i8 %cmp, i8 %swap) { +; CHECK-MAIN: f1: +; CHECK-MAIN: sllg [[SHIFT:%r[1-9]+]], %r3, 3 +; CHECK-MAIN: nill %r3, 65532 +; CHECK-MAIN: l [[OLD:%r[0-9]+]], 0(%r3) +; CHECK-MAIN: [[LOOP:\.[^ ]*]]: +; CHECK-MAIN: rll %r2, [[OLD]], 8([[SHIFT]]) +; CHECK-MAIN: risbg %r4, %r2, 32, 55, 0 +; CHECK-MAIN: cr %r2, %r4 +; CHECK-MAIN: j{{g?}}lh [[EXIT:\.[^ ]*]] +; CHECK-MAIN: risbg %r5, %r2, 32, 55, 0 +; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r5, -8({{%r[1-9]+}}) +; CHECK-MAIN: cs [[OLD]], [[NEW]], 0(%r3) +; CHECK-MAIN: j{{g?}}lh [[LOOP]] +; CHECK-MAIN: [[EXIT]]: +; CHECK-MAIN-NOT: %r2 +; CHECK-MAIN: br %r14 +; +; CHECK-SHIFT: f1: +; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r3, 3 +; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT: rll +; CHECK-SHIFT: rll {{%r[0-9]+}}, %r5, -8([[NEGSHIFT]]) + %res = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst + ret i8 %res +} + +; Check compare and swap with constants. We should force the constants into +; registers and use the sequence above. +define i8 @f2(i8 *%src) { +; CHECK: f2: +; CHECK: lhi [[CMP:%r[0-9]+]], 42 +; CHECK: risbg [[CMP]], {{%r[0-9]+}}, 32, 55, 0 +; CHECK: risbg +; CHECK: br %r14 +; +; CHECK-SHIFT: f2: +; CHECK-SHIFT: lhi [[SWAP:%r[0-9]+]], 88 +; CHECK-SHIFT: risbg +; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 55, 0 +; CHECK-SHIFT: br %r14 + %res = cmpxchg i8 *%src, i8 42, i8 88 seq_cst + ret i8 %res +} diff --git a/test/CodeGen/SystemZ/cmpxchg-02.ll b/test/CodeGen/SystemZ/cmpxchg-02.ll new file mode 100644 index 0000000..cc34523 --- /dev/null +++ b/test/CodeGen/SystemZ/cmpxchg-02.ll @@ -0,0 +1,56 @@ +; Test 16-bit compare and swap. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT + +; Check compare and swap with a variable. +; - CHECK is for the main loop. +; - CHECK-SHIFT makes sure that the negated shift count used by the second +; RLL is set up correctly. The negation is independent of the NILL and L +; tested in CHECK. CHECK-SHIFT also checks that %r3 is not modified before +; being used in the RISBG (in contrast to things like atomic addition, +; which shift %r3 left so that %b is at the high end of the word). +define i16 @f1(i16 %dummy, i16 *%src, i16 %cmp, i16 %swap) { +; CHECK-MAIN: f1: +; CHECK-MAIN: sllg [[SHIFT:%r[1-9]+]], %r3, 3 +; CHECK-MAIN: nill %r3, 65532 +; CHECK-MAIN: l [[OLD:%r[0-9]+]], 0(%r3) +; CHECK-MAIN: [[LOOP:\.[^ ]*]]: +; CHECK-MAIN: rll %r2, [[OLD]], 16([[SHIFT]]) +; CHECK-MAIN: risbg %r4, %r2, 32, 47, 0 +; CHECK-MAIN: cr %r2, %r4 +; CHECK-MAIN: j{{g?}}lh [[EXIT:\.[^ ]*]] +; CHECK-MAIN: risbg %r5, %r2, 32, 47, 0 +; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r5, -16({{%r[1-9]+}}) +; CHECK-MAIN: cs [[OLD]], [[NEW]], 0(%r3) +; CHECK-MAIN: j{{g?}}lh [[LOOP]] +; CHECK-MAIN: [[EXIT]]: +; CHECK-MAIN-NOT: %r2 +; CHECK-MAIN: br %r14 +; +; CHECK-SHIFT: f1: +; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r3, 3 +; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]] +; CHECK-SHIFT: rll +; CHECK-SHIFT: rll {{%r[0-9]+}}, %r5, -16([[NEGSHIFT]]) + %res = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst + ret i16 %res +} + +; Check compare and swap with constants. We should force the constants into +; registers and use the sequence above. +define i16 @f2(i16 *%src) { +; CHECK: f2: +; CHECK: lhi [[CMP:%r[0-9]+]], 42 +; CHECK: risbg [[CMP]], {{%r[0-9]+}}, 32, 47, 0 +; CHECK: risbg +; CHECK: br %r14 +; +; CHECK-SHIFT: f2: +; CHECK-SHIFT: lhi [[SWAP:%r[0-9]+]], 88 +; CHECK-SHIFT: risbg +; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 47, 0 +; CHECK-SHIFT: br %r14 + %res = cmpxchg i16 *%src, i16 42, i16 88 seq_cst + ret i16 %res +} diff --git a/test/CodeGen/SystemZ/cmpxchg-03.ll b/test/CodeGen/SystemZ/cmpxchg-03.ll new file mode 100644 index 0000000..45e224e --- /dev/null +++ b/test/CodeGen/SystemZ/cmpxchg-03.ll @@ -0,0 +1,131 @@ +; Test 32-bit compare and swap. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the CS range. +define i32 @f1(i32 %cmp, i32 %swap, i32 *%src) { +; CHECK: f1: +; CHECK: cs %r2, %r3, 0(%r4) +; CHECK: br %r14 + %val = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check the high end of the aligned CS range. +define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) { +; CHECK: f2: +; CHECK: cs %r2, %r3, 4092(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check the next word up, which should use CSY instead of CS. +define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) { +; CHECK: f3: +; CHECK: csy %r2, %r3, 4096(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check the high end of the aligned CSY range. +define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) { +; CHECK: f4: +; CHECK: csy %r2, %r3, 524284(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) { +; CHECK: f5: +; CHECK: agfi %r4, 524288 +; CHECK: cs %r2, %r3, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check the high end of the negative aligned CSY range. +define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) { +; CHECK: f6: +; CHECK: csy %r2, %r3, -4(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check the low end of the CSY range. +define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) { +; CHECK: f7: +; CHECK: csy %r2, %r3, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) { +; CHECK: f8: +; CHECK: agfi %r4, -524292 +; CHECK: cs %r2, %r3, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check that CS does not allow an index. +define i32 @f9(i32 %cmp, i32 %swap, i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: agr %r4, %r5 +; CHECK: cs %r2, %r3, 0(%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %ptr = inttoptr i64 %add1 to i32 * + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check that CSY does not allow an index. +define i32 @f10(i32 %cmp, i32 %swap, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: agr %r4, %r5 +; CHECK: csy %r2, %r3, 4096(%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst + ret i32 %val +} + +; Check that a constant %cmp value is loaded into a register first. +define i32 @f11(i32 %dummy, i32 %swap, i32 *%ptr) { +; CHECK: f11: +; CHECK: lhi %r2, 1001 +; CHECK: cs %r2, %r3, 0(%r4) +; CHECK: br %r14 + %val = cmpxchg i32 *%ptr, i32 1001, i32 %swap seq_cst + ret i32 %val +} + +; Check that a constant %swap value is loaded into a register first. +define i32 @f12(i32 %cmp, i32 *%ptr) { +; CHECK: f12: +; CHECK: lhi [[SWAP:%r[0-9]+]], 1002 +; CHECK: cs %r2, [[SWAP]], 0(%r3) +; CHECK: br %r14 + %val = cmpxchg i32 *%ptr, i32 %cmp, i32 1002 seq_cst + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/cmpxchg-04.ll b/test/CodeGen/SystemZ/cmpxchg-04.ll new file mode 100644 index 0000000..f8969ee --- /dev/null +++ b/test/CodeGen/SystemZ/cmpxchg-04.ll @@ -0,0 +1,98 @@ +; Test 64-bit compare and swap. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check CSG without a displacement. +define i64 @f1(i64 %cmp, i64 %swap, i64 *%src) { +; CHECK: f1: +; CHECK: csg %r2, %r3, 0(%r4) +; CHECK: br %r14 + %val = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst + ret i64 %val +} + +; Check the high end of the aligned CSG range. +define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) { +; CHECK: f2: +; CHECK: csg %r2, %r3, 524280(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst + ret i64 %val +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) { +; CHECK: f3: +; CHECK: agfi %r4, 524288 +; CHECK: csg %r2, %r3, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst + ret i64 %val +} + +; Check the high end of the negative aligned CSG range. +define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) { +; CHECK: f4: +; CHECK: csg %r2, %r3, -8(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst + ret i64 %val +} + +; Check the low end of the CSG range. +define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) { +; CHECK: f5: +; CHECK: csg %r2, %r3, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst + ret i64 %val +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) { +; CHECK: f6: +; CHECK: agfi %r4, -524296 +; CHECK: csg %r2, %r3, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst + ret i64 %val +} + +; Check that CSG does not allow an index. +define i64 @f7(i64 %cmp, i64 %swap, i64 %src, i64 %index) { +; CHECK: f7: +; CHECK: agr %r4, %r5 +; CHECK: csg %r2, %r3, 0(%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %ptr = inttoptr i64 %add1 to i64 * + %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst + ret i64 %val +} + +; Check that a constant %cmp value is loaded into a register first. +define i64 @f8(i64 %dummy, i64 %swap, i64 *%ptr) { +; CHECK: f8: +; CHECK: lghi %r2, 1001 +; CHECK: csg %r2, %r3, 0(%r4) +; CHECK: br %r14 + %val = cmpxchg i64 *%ptr, i64 1001, i64 %swap seq_cst + ret i64 %val +} + +; Check that a constant %swap value is loaded into a register first. +define i64 @f9(i64 %cmp, i64 *%ptr) { +; CHECK: f9: +; CHECK: lghi [[SWAP:%r[0-9]+]], 1002 +; CHECK: csg %r2, [[SWAP]], 0(%r3) +; CHECK: br %r14 + %val = cmpxchg i64 *%ptr, i64 %cmp, i64 1002 seq_cst + ret i64 %val +} diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll new file mode 100644 index 0000000..81b3fb2 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-abs-01.ll @@ -0,0 +1,40 @@ +; Test floating-point absolute. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32. +declare float @llvm.fabs.f32(float %f) +define float @f1(float %f) { +; CHECK: f1: +; CHECK: lpebr %f0, %f0 +; CHECK: br %r14 + %res = call float @llvm.fabs.f32(float %f) + ret float %res +} + +; Test f64. +declare double @llvm.fabs.f64(double %f) +define double @f2(double %f) { +; CHECK: f2: +; CHECK: lpdbr %f0, %f0 +; CHECK: br %r14 + %res = call double @llvm.fabs.f64(double %f) + ret double %res +} + +; Test f128. With the loads and stores, a pure absolute would probably +; be better implemented using an NI on the upper byte. Do some extra +; processing so that using FPRs is unequivocally better. +declare fp128 @llvm.fabs.f128(fp128 %f) +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK: f3: +; CHECK: lpxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 *%ptr + %abs = call fp128 @llvm.fabs.f128(fp128 %orig) + %op2 = load fp128 *%ptr2 + %res = fdiv fp128 %abs, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll new file mode 100644 index 0000000..513d49c --- /dev/null +++ b/test/CodeGen/SystemZ/fp-abs-02.ll @@ -0,0 +1,43 @@ +; Test negated floating-point absolute. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32. +declare float @llvm.fabs.f32(float %f) +define float @f1(float %f) { +; CHECK: f1: +; CHECK: lnebr %f0, %f0 +; CHECK: br %r14 + %abs = call float @llvm.fabs.f32(float %f) + %res = fsub float -0.0, %abs + ret float %res +} + +; Test f64. +declare double @llvm.fabs.f64(double %f) +define double @f2(double %f) { +; CHECK: f2: +; CHECK: lndbr %f0, %f0 +; CHECK: br %r14 + %abs = call double @llvm.fabs.f64(double %f) + %res = fsub double -0.0, %abs + ret double %res +} + +; Test f128. With the loads and stores, a pure negative-absolute would +; probably be better implemented using an OI on the upper byte. Do some +; extra processing so that using FPRs is unequivocally better. +declare fp128 @llvm.fabs.f128(fp128 %f) +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK: f3: +; CHECK: lnxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 *%ptr + %abs = call fp128 @llvm.fabs.f128(fp128 %orig) + %negabs = fsub fp128 0xL00000000000000008000000000000000, %abs + %op2 = load fp128 *%ptr2 + %res = fdiv fp128 %negabs, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-add-01.ll b/test/CodeGen/SystemZ/fp-add-01.ll new file mode 100644 index 0000000..7ce0777 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-add-01.ll @@ -0,0 +1,71 @@ +; Test 32-bit floating-point addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register addition. +define float @f1(float %f1, float %f2) { +; CHECK: f1: +; CHECK: aebr %f0, %f2 +; CHECK: br %r14 + %res = fadd float %f1, %f2 + ret float %res +} + +; Check the low end of the AEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK: f2: +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float *%ptr + %res = fadd float %f1, %f2 + ret float %res +} + +; Check the high end of the aligned AEB range. +define float @f3(float %f1, float *%base) { +; CHECK: f3: +; CHECK: aeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %f2 = load float *%ptr + %res = fadd float %f1, %f2 + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %f2 = load float *%ptr + %res = fadd float %f1, %f2 + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: aeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %f2 = load float *%ptr + %res = fadd float %f1, %f2 + ret float %res +} + +; Check that AEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: aeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %f2 = load float *%ptr2 + %res = fadd float %f1, %f2 + ret float %res +} diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll new file mode 100644 index 0000000..08eb90e --- /dev/null +++ b/test/CodeGen/SystemZ/fp-add-02.ll @@ -0,0 +1,71 @@ +; Test 64-bit floating-point addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register addition. +define double @f1(double %f1, double %f2) { +; CHECK: f1: +; CHECK: adbr %f0, %f2 +; CHECK: br %r14 + %res = fadd double %f1, %f2 + ret double %res +} + +; Check the low end of the ADB range. +define double @f2(double %f1, double *%ptr) { +; CHECK: f2: +; CHECK: adb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load double *%ptr + %res = fadd double %f1, %f2 + ret double %res +} + +; Check the high end of the aligned ADB range. +define double @f3(double %f1, double *%base) { +; CHECK: f3: +; CHECK: adb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr + %res = fadd double %f1, %f2 + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %f1, double *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: adb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr + %res = fadd double %f1, %f2 + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double %f1, double *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -8 +; CHECK: adb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr + %res = fadd double %f1, %f2 + ret double %res +} + +; Check that ADB allows indices. +define double @f6(double %f1, double *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: adb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 + %f2 = load double *%ptr2 + %res = fadd double %f1, %f2 + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-add-03.ll b/test/CodeGen/SystemZ/fp-add-03.ll new file mode 100644 index 0000000..13ffb02 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-add-03.ll @@ -0,0 +1,20 @@ +; Test 128-bit floating-point addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; There is no memory form of 128-bit addition. +define void @f1(fp128 *%ptr, float %f2) { +; CHECK: f1: +; CHECK: lxebr %f0, %f0 +; CHECK: ld %f1, 0(%r2) +; CHECK: ld %f3, 8(%r2) +; CHECK: axbr %f1, %f0 +; CHECK: std %f1, 0(%r2) +; CHECK: std %f3, 8(%r2) +; CHECK: br %r14 + %f1 = load fp128 *%ptr + %f2x = fpext float %f2 to fp128 + %sum = fadd fp128 %f1, %f2x + store fp128 %sum, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-cmp-01.ll b/test/CodeGen/SystemZ/fp-cmp-01.ll new file mode 100644 index 0000000..b80a715 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-cmp-01.ll @@ -0,0 +1,89 @@ +; Test 32-bit floating-point comparison. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check comparison with registers. +define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) { +; CHECK: f1: +; CHECK: cebr %f0, %f2 +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the low end of the CEB range. +define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) { +; CHECK: f2: +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f2 = load float *%ptr + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the high end of the aligned CEB range. +define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) { +; CHECK: f3: +; CHECK: ceb %f0, 4092(%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %f2 = load float *%ptr + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) { +; CHECK: f4: +; CHECK: aghi %r4, 4096 +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %f2 = load float *%ptr + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check negative displacements, which also need separate address logic. +define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) { +; CHECK: f5: +; CHECK: aghi %r4, -4 +; CHECK: ceb %f0, 0(%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %f2 = load float *%ptr + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that CEB allows indices. +define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r5, 2 +; CHECK: ceb %f0, 400(%r1,%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %f2 = load float *%ptr2 + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll new file mode 100644 index 0000000..8227308 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-cmp-02.ll @@ -0,0 +1,89 @@ +; Test 64-bit floating-point comparison. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check comparison with registers. +define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { +; CHECK: f1: +; CHECK: cdbr %f0, %f2 +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the low end of the CDB range. +define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { +; CHECK: f2: +; CHECK: cdb %f0, 0(%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f2 = load double *%ptr + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the high end of the aligned CDB range. +define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) { +; CHECK: f3: +; CHECK: cdb %f0, 4088(%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) { +; CHECK: f4: +; CHECK: aghi %r4, 4096 +; CHECK: cdb %f0, 0(%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check negative displacements, which also need separate address logic. +define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) { +; CHECK: f5: +; CHECK: aghi %r4, -8 +; CHECK: cdb %f0, 0(%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that CDB allows indices. +define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r5, 3 +; CHECK: cdb %f0, 800(%r1,%r4) +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 + %f2 = load double *%ptr2 + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/fp-cmp-03.ll b/test/CodeGen/SystemZ/fp-cmp-03.ll new file mode 100644 index 0000000..fd12c93 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-cmp-03.ll @@ -0,0 +1,20 @@ +; Test 128-bit floating-point comparison. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; There is no memory form of 128-bit comparison. +define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) { +; CHECK: f1: +; CHECK: lxebr %f0, %f0 +; CHECK: ld %f1, 0(%r4) +; CHECK: ld %f3, 8(%r4) +; CHECK: cxbr %f1, %f0 +; CHECK-NEXT: j{{g?}}e +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + %f2x = fpext float %f2 to fp128 + %f1 = load fp128 *%ptr + %cond = fcmp oeq fp128 %f1, %f2x + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/fp-const-01.ll b/test/CodeGen/SystemZ/fp-const-01.ll new file mode 100644 index 0000000..65209d6 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-01.ll @@ -0,0 +1,30 @@ +; Test loads of floating-point zero. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32. +define float @f1() { +; CHECK: f1: +; CHECK: lzer %f0 +; CHECK: br %r14 + ret float 0.0 +} + +; Test f64. +define double @f2() { +; CHECK: f2: +; CHECK: lzdr %f0 +; CHECK: br %r14 + ret double 0.0 +} + +; Test f128. +define void @f3(fp128 *%x) { +; CHECK: f3: +; CHECK: lzxr %f0 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + store fp128 0xL00000000000000000000000000000000, fp128 *%x + ret void +} diff --git a/test/CodeGen/SystemZ/fp-const-02.ll b/test/CodeGen/SystemZ/fp-const-02.ll new file mode 100644 index 0000000..2dedf54 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-02.ll @@ -0,0 +1,31 @@ +; Test loads of negative floating-point zero. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32. +define float @f1() { +; CHECK: f1: +; CHECK: lzer [[REGISTER:%f[0-5]+]] +; CHECK: lcebr %f0, [[REGISTER]] +; CHECK: br %r14 + ret float -0.0 +} + +; Test f64. +define double @f2() { +; CHECK: f2: +; CHECK: lzdr [[REGISTER:%f[0-5]+]] +; CHECK: lcdbr %f0, [[REGISTER]] +; CHECK: br %r14 + ret double -0.0 +} + +; Test f128. +define void @f3(fp128 *%x) { +; CHECK: f3: +; CHECK: lzxr [[REGISTER:%f[0-5]+]] +; CHECK: lcxbr %f0, [[REGISTER]] +; CHECK: br %r14 + store fp128 0xL00000000000000008000000000000000, fp128 *%x + ret void +} diff --git a/test/CodeGen/SystemZ/fp-const-03.ll b/test/CodeGen/SystemZ/fp-const-03.ll new file mode 100644 index 0000000..4c287e4 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-03.ll @@ -0,0 +1,14 @@ +; Test loads of 32-bit floating-point constants. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST + +define float @f1() { +; CHECK: f1: +; CHECK: larl [[REGISTER:%r[1-5]]], {{.*}} +; CHECK: le %f0, 0([[REGISTER]]) +; CHECK: br %r14 +; +; CONST: .long 1065353217 + ret float 0x3ff0000020000000 +} diff --git a/test/CodeGen/SystemZ/fp-const-04.ll b/test/CodeGen/SystemZ/fp-const-04.ll new file mode 100644 index 0000000..847c380 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-04.ll @@ -0,0 +1,15 @@ +; Test loads of 64-bit floating-point constants that can be represented +; as 32-bit constants. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST + +define double @f1() { +; CHECK: f1: +; CHECK: larl [[REGISTER:%r[1-5]]], {{.*}} +; CHECK: ldeb %f0, 0([[REGISTER]]) +; CHECK: br %r14 +; +; CONST: .long 1065353217 + ret double 0x3ff0000020000000 +} diff --git a/test/CodeGen/SystemZ/fp-const-05.ll b/test/CodeGen/SystemZ/fp-const-05.ll new file mode 100644 index 0000000..48f84ce --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-05.ll @@ -0,0 +1,18 @@ +; Test loads of 128-bit floating-point constants that can be represented +; as 32-bit constants. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST + +define void @f1(fp128 *%x) { +; CHECK: f1: +; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} +; CHECK: lxeb %f0, 0([[REGISTER]]) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 +; +; CONST: .long 1065353217 + store fp128 0xL00000000000000003fff000002000000, fp128 *%x + ret void +} diff --git a/test/CodeGen/SystemZ/fp-const-06.ll b/test/CodeGen/SystemZ/fp-const-06.ll new file mode 100644 index 0000000..1da3d5e --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-06.ll @@ -0,0 +1,14 @@ +; Test loads of 64-bit floating-point constants. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST + +define double @f1() { +; CHECK: f1: +; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} +; CHECK: ld %f0, 0([[REGISTER]]) +; CHECK: br %r14 +; +; CONST: .quad 4607182419068452864 + ret double 0x3ff0000010000000 +} diff --git a/test/CodeGen/SystemZ/fp-const-07.ll b/test/CodeGen/SystemZ/fp-const-07.ll new file mode 100644 index 0000000..5a10845 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-07.ll @@ -0,0 +1,18 @@ +; Test loads of 128-bit floating-point constants that can be represented +; as 64-bit constants. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST + +define void @f1(fp128 *%x) { +; CHECK: f1: +; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} +; CHECK: lxdb %f0, 0([[REGISTER]]) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 +; +; CONST: .quad 4607182419068452864 + store fp128 0xL00000000000000003fff000001000000, fp128 *%x + ret void +} diff --git a/test/CodeGen/SystemZ/fp-const-08.ll b/test/CodeGen/SystemZ/fp-const-08.ll new file mode 100644 index 0000000..6a8a1ab --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-08.ll @@ -0,0 +1,21 @@ +; Test loads of 128-bit floating-point constants. This value would actually +; fit within the x86 80-bit format, so the test make sure we don't try to +; extend from an f80. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST + +define void @f1(fp128 *%x) { +; CHECK: f1: +; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} +; CHECK: ld %f0, 0([[REGISTER]]) +; CHECK: ld %f2, 8([[REGISTER]]) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 +; +; CONST: .quad 4611404543450677248 +; CONST: .quad 576460752303423488 + store fp128 0xL08000000000000003fff000000000000, fp128 *%x + ret void +} diff --git a/test/CodeGen/SystemZ/fp-const-09.ll b/test/CodeGen/SystemZ/fp-const-09.ll new file mode 100644 index 0000000..435dcba --- /dev/null +++ b/test/CodeGen/SystemZ/fp-const-09.ll @@ -0,0 +1,20 @@ +; Test loads of 128-bit floating-point constants in which the low bit of +; the significand is set. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST + +define void @f1(fp128 *%x) { +; CHECK: f1: +; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}} +; CHECK: ld %f0, 0([[REGISTER]]) +; CHECK: ld %f2, 8([[REGISTER]]) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 +; +; CONST: .quad 4611404543450677248 +; CONST: .quad 1 + store fp128 0xL00000000000000013fff000000000000, fp128 *%x + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-01.ll b/test/CodeGen/SystemZ/fp-conv-01.ll new file mode 100644 index 0000000..6c8ef48 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-01.ll @@ -0,0 +1,61 @@ +; Test floating-point truncations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f64->f32. +define float @f1(double %d1, double %d2) { +; CHECK: f1: +; CHECK: ledbr %f0, %f2 +; CHECK: br %r14 + %res = fptrunc double %d2 to float + ret float %res +} + +; Test f128->f32. +define float @f2(fp128 *%ptr) { +; CHECK: f2: +; CHECK: lexbr %f0, %f0 +; CHECK: br %r14 + %val = load fp128 *%ptr + %res = fptrunc fp128 %val to float + ret float %res +} + +; Make sure that we don't use %f0 as the destination of LEXBR when %f2 +; is still live. +define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) { +; CHECK: f3: +; CHECK: lexbr %f1, %f1 +; CHECK: aebr %f1, %f2 +; CHECK: ste %f1, 0(%r2) +; CHECK: br %r14 + %val = load fp128 *%ptr + %conv = fptrunc fp128 %val to float + %res = fadd float %conv, %d2 + store float %res, float *%dst + ret void +} + +; Test f128->f64. +define double @f4(fp128 *%ptr) { +; CHECK: f4: +; CHECK: ldxbr %f0, %f0 +; CHECK: br %r14 + %val = load fp128 *%ptr + %res = fptrunc fp128 %val to double + ret double %res +} + +; Like f3, but for f128->f64. +define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) { +; CHECK: f5: +; CHECK: ldxbr %f1, %f1 +; CHECK: adbr %f1, %f2 +; CHECK: std %f1, 0(%r2) +; CHECK: br %r14 + %val = load fp128 *%ptr + %conv = fptrunc fp128 %val to double + %res = fadd double %conv, %d2 + store double %res, double *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-02.ll b/test/CodeGen/SystemZ/fp-conv-02.ll new file mode 100644 index 0000000..f284e1d --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-02.ll @@ -0,0 +1,71 @@ +; Test extensions of f32 to f64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register extension. +define double @f1(float %val) { +; CHECK: f1: +; CHECK: ldebr %f0, %f0 +; CHECK: br %r14 + %res = fpext float %val to double + ret double %res +} + +; Check the low end of the LDEB range. +define double @f2(float *%ptr) { +; CHECK: f2: +; CHECK: ldeb %f0, 0(%r2) +; CHECK: br %r14 + %val = load float *%ptr + %res = fpext float %val to double + ret double %res +} + +; Check the high end of the aligned LDEB range. +define double @f3(float *%base) { +; CHECK: f3: +; CHECK: ldeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %val = load float *%ptr + %res = fpext float %val to double + ret double %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(float *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: ldeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %val = load float *%ptr + %res = fpext float %val to double + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(float *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: ldeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %val = load float *%ptr + %res = fpext float %val to double + ret double %res +} + +; Check that LDEB allows indices. +define double @f6(float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: ldeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %val = load float *%ptr2 + %res = fpext float %val to double + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-conv-03.ll b/test/CodeGen/SystemZ/fp-conv-03.ll new file mode 100644 index 0000000..703a141 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-03.ll @@ -0,0 +1,89 @@ +; Test extensions of f32 to f128. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register extension. +define void @f1(fp128 *%dst, float %val) { +; CHECK: f1: +; CHECK: lxebr %f0, %f0 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %res = fpext float %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check the low end of the LXEB range. +define void @f2(fp128 *%dst, float *%ptr) { +; CHECK: f2: +; CHECK: lxeb %f0, 0(%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %val = load float *%ptr + %res = fpext float %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check the high end of the aligned LXEB range. +define void @f3(fp128 *%dst, float *%base) { +; CHECK: f3: +; CHECK: lxeb %f0, 4092(%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %val = load float *%ptr + %res = fpext float %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f4(fp128 *%dst, float *%base) { +; CHECK: f4: +; CHECK: aghi %r3, 4096 +; CHECK: lxeb %f0, 0(%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %val = load float *%ptr + %res = fpext float %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check negative displacements, which also need separate address logic. +define void @f5(fp128 *%dst, float *%base) { +; CHECK: f5: +; CHECK: aghi %r3, -4 +; CHECK: lxeb %f0, 0(%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %val = load float *%ptr + %res = fpext float %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check that LXEB allows indices. +define void @f6(fp128 *%dst, float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r4, 2 +; CHECK: lxeb %f0, 400(%r1,%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %val = load float *%ptr2 + %res = fpext float %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-04.ll b/test/CodeGen/SystemZ/fp-conv-04.ll new file mode 100644 index 0000000..b7b5166 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-04.ll @@ -0,0 +1,89 @@ +; Test extensions of f64 to f128. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register extension. +define void @f1(fp128 *%dst, double %val) { +; CHECK: f1: +; CHECK: lxdbr %f0, %f0 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %res = fpext double %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check the low end of the LXDB range. +define void @f2(fp128 *%dst, double *%ptr) { +; CHECK: f2: +; CHECK: lxdb %f0, 0(%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %val = load double *%ptr + %res = fpext double %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check the high end of the aligned LXDB range. +define void @f3(fp128 *%dst, double *%base) { +; CHECK: f3: +; CHECK: lxdb %f0, 4088(%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %val = load double *%ptr + %res = fpext double %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f4(fp128 *%dst, double *%base) { +; CHECK: f4: +; CHECK: aghi %r3, 4096 +; CHECK: lxdb %f0, 0(%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %val = load double *%ptr + %res = fpext double %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check negative displacements, which also need separate address logic. +define void @f5(fp128 *%dst, double *%base) { +; CHECK: f5: +; CHECK: aghi %r3, -8 +; CHECK: lxdb %f0, 0(%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %val = load double *%ptr + %res = fpext double %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} + +; Check that LXDB allows indices. +define void @f6(fp128 *%dst, double *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r4, 3 +; CHECK: lxdb %f0, 800(%r1,%r3) +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 + %val = load double *%ptr2 + %res = fpext double %val to fp128 + store fp128 %res, fp128 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-05.ll b/test/CodeGen/SystemZ/fp-conv-05.ll new file mode 100644 index 0000000..2d88732 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-05.ll @@ -0,0 +1,33 @@ +; Test conversions of signed i32s to floating-point values. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check i32->f32. +define float @f1(i32 %i) { +; CHECK: f1: +; CHECK: cefbr %f0, %r2 +; CHECK: br %r14 + %conv = sitofp i32 %i to float + ret float %conv +} + +; Check i32->f64. +define double @f2(i32 %i) { +; CHECK: f2: +; CHECK: cdfbr %f0, %r2 +; CHECK: br %r14 + %conv = sitofp i32 %i to double + ret double %conv +} + +; Check i32->f128. +define void @f3(i32 %i, fp128 *%dst) { +; CHECK: f3: +; CHECK: cxfbr %f0, %r2 +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %conv = sitofp i32 %i to fp128 + store fp128 %conv, fp128 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-06.ll b/test/CodeGen/SystemZ/fp-conv-06.ll new file mode 100644 index 0000000..1b39b67 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-06.ll @@ -0,0 +1,37 @@ +; Test conversions of unsigned i32s to floating-point values. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check i32->f32. There is no native instruction, so we must promote +; to i64 first. +define float @f1(i32 %i) { +; CHECK: f1: +; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2 +; CHECK: cegbr %f0, [[REGISTER]] +; CHECK: br %r14 + %conv = uitofp i32 %i to float + ret float %conv +} + +; Check i32->f64. +define double @f2(i32 %i) { +; CHECK: f2: +; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2 +; CHECK: cdgbr %f0, [[REGISTER]] +; CHECK: br %r14 + %conv = uitofp i32 %i to double + ret double %conv +} + +; Check i32->f128. +define void @f3(i32 %i, fp128 *%dst) { +; CHECK: f3: +; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2 +; CHECK: cxgbr %f0, [[REGISTER]] +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %conv = uitofp i32 %i to fp128 + store fp128 %conv, fp128 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-07.ll b/test/CodeGen/SystemZ/fp-conv-07.ll new file mode 100644 index 0000000..0ebbd37 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-07.ll @@ -0,0 +1,33 @@ +; Test conversions of signed i64s to floating-point values. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test i64->f32. +define float @f1(i64 %i) { +; CHECK: f1: +; CHECK: cegbr %f0, %r2 +; CHECK: br %r14 + %conv = sitofp i64 %i to float + ret float %conv +} + +; Test i64->f64. +define double @f2(i64 %i) { +; CHECK: f2: +; CHECK: cdgbr %f0, %r2 +; CHECK: br %r14 + %conv = sitofp i64 %i to double + ret double %conv +} + +; Test i64->f128. +define void @f3(i64 %i, fp128 *%dst) { +; CHECK: f3: +; CHECK: cxgbr %f0, %r2 +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %conv = sitofp i64 %i to fp128 + store fp128 %conv, fp128 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-08.ll b/test/CodeGen/SystemZ/fp-conv-08.ll new file mode 100644 index 0000000..20c4e30 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-08.ll @@ -0,0 +1,35 @@ +; Test conversions of unsigned i64s to floating-point values. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test i64->f32. There's no native support for unsigned i64-to-fp conversions, +; but we should be able to implement them using signed i64-to-fp conversions. +define float @f1(i64 %i) { +; CHECK: f1: +; CHECK: cegbr +; CHECK: aebr +; CHECK: br %r14 + %conv = uitofp i64 %i to float + ret float %conv +} + +; Test i64->f64. +define double @f2(i64 %i) { +; CHECK: f2: +; CHECK: ldgr +; CHECL: adbr +; CHECK: br %r14 + %conv = uitofp i64 %i to double + ret double %conv +} + +; Test i64->f128. +define void @f3(i64 %i, fp128 *%dst) { +; CHECK: f3: +; CHECK: cxgbr +; CHECK: axbr +; CHECK: br %r14 + %conv = uitofp i64 %i to fp128 + store fp128 %conv, fp128 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/fp-conv-09.ll b/test/CodeGen/SystemZ/fp-conv-09.ll new file mode 100644 index 0000000..e3c0352 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-09.ll @@ -0,0 +1,33 @@ +; Test conversion of floating-point values to signed i32s. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32->i32. +define i32 @f1(float %f) { +; CHECK: f1: +; CHECK: cfebr %r2, 5, %f0 +; CHECK: br %r14 + %conv = fptosi float %f to i32 + ret i32 %conv +} + +; Test f64->i32. +define i32 @f2(double %f) { +; CHECK: f2: +; CHECK: cfdbr %r2, 5, %f0 +; CHECK: br %r14 + %conv = fptosi double %f to i32 + ret i32 %conv +} + +; Test f128->i32. +define i32 @f3(fp128 *%src) { +; CHECK: f3: +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: cfxbr %r2, 5, %f0 +; CHECK: br %r14 + %f = load fp128 *%src + %conv = fptosi fp128 %f to i32 + ret i32 %conv +} diff --git a/test/CodeGen/SystemZ/fp-conv-10.ll b/test/CodeGen/SystemZ/fp-conv-10.ll new file mode 100644 index 0000000..bb8878b --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-10.ll @@ -0,0 +1,45 @@ +; Test conversion of floating-point values to unsigned i32s. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; z10 doesn't have native support for unsigned fp-to-i32 conversions; +; they were added in z196 as the Convert to Logical family of instructions. +; Promoting to i64 doesn't generate an inexact condition for values that are +; outside the i32 range but in the i64 range, so use the default expansion. + +; Test f32->i32. +define i32 @f1(float %f) { +; CHECK: f1: +; CHECK: cebr +; CHECK: sebr +; CHECK: cfebr +; CHECK: xilf +; CHECK: br %r14 + %conv = fptoui float %f to i32 + ret i32 %conv +} + +; Test f64->i32. +define i32 @f2(double %f) { +; CHECK: f2: +; CHECK: cdbr +; CHECK: sdbr +; CHECK: cfdbr +; CHECK: xilf +; CHECK: br %r14 + %conv = fptoui double %f to i32 + ret i32 %conv +} + +; Test f128->i32. +define i32 @f3(fp128 *%src) { +; CHECK: f3: +; CHECK: cxbr +; CHECK: sxbr +; CHECK: cfxbr +; CHECK: xilf +; CHECK: br %r14 + %f = load fp128 *%src + %conv = fptoui fp128 %f to i32 + ret i32 %conv +} diff --git a/test/CodeGen/SystemZ/fp-conv-11.ll b/test/CodeGen/SystemZ/fp-conv-11.ll new file mode 100644 index 0000000..2a36cb9 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-11.ll @@ -0,0 +1,33 @@ +; Test conversion of floating-point values to signed i64s. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32->i64. +define i64 @f1(float %f) { +; CHECK: f1: +; CHECK: cgebr %r2, 5, %f0 +; CHECK: br %r14 + %conv = fptosi float %f to i64 + ret i64 %conv +} + +; Test f64->i64. +define i64 @f2(double %f) { +; CHECK: f2: +; CHECK: cgdbr %r2, 5, %f0 +; CHECK: br %r14 + %conv = fptosi double %f to i64 + ret i64 %conv +} + +; Test f128->i64. +define i64 @f3(fp128 *%src) { +; CHECK: f3: +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: cgxbr %r2, 5, %f0 +; CHECK: br %r14 + %f = load fp128 *%src + %conv = fptosi fp128 %f to i64 + ret i64 %conv +} diff --git a/test/CodeGen/SystemZ/fp-conv-12.ll b/test/CodeGen/SystemZ/fp-conv-12.ll new file mode 100644 index 0000000..4445b14 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-conv-12.ll @@ -0,0 +1,44 @@ +; Test conversion of floating-point values to unsigned i64s. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; z10 doesn't have native support for unsigned fp-to-i64 conversions; +; they were added in z196 as the Convert to Logical family of instructions. +; Convert via signed i64s instead. + +; Test f32->i64. +define i64 @f1(float %f) { +; CHECK: f1: +; CHECK: cebr +; CHECK: sebr +; CHECK: cgebr +; CHECK: xihf +; CHECK: br %r14 + %conv = fptoui float %f to i64 + ret i64 %conv +} + +; Test f64->i64. +define i64 @f2(double %f) { +; CHECK: f2: +; CHECK: cdbr +; CHECK: sdbr +; CHECK: cgdbr +; CHECK: xihf +; CHECK: br %r14 + %conv = fptoui double %f to i64 + ret i64 %conv +} + +; Test f128->i64. +define i64 @f3(fp128 *%src) { +; CHECK: f3: +; CHECK: cxbr +; CHECK: sxbr +; CHECK: cgxbr +; CHECK: xihf +; CHECK: br %r14 + %f = load fp128 *%src + %conv = fptoui fp128 %f to i64 + ret i64 %conv +} diff --git a/test/CodeGen/SystemZ/fp-copysign-01.ll b/test/CodeGen/SystemZ/fp-copysign-01.ll new file mode 100644 index 0000000..458d475 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-copysign-01.ll @@ -0,0 +1,128 @@ +; Test copysign operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare float @copysignf(float, float) readnone +declare double @copysign(double, double) readnone +; FIXME: not really the correct prototype for SystemZ. +declare fp128 @copysignl(fp128, fp128) readnone + +; Test f32 copies in which the sign comes from an f32. +define float @f1(float %a, float %b) { +; CHECK: f1: +; CHECK-NOT: %f2 +; CHECK: cpsdr %f0, %f0, %f2 +; CHECK: br %r14 + %res = call float @copysignf(float %a, float %b) readnone + ret float %res +} + +; Test f32 copies in which the sign comes from an f64. +define float @f2(float %a, double %bd) { +; CHECK: f2: +; CHECK-NOT: %f2 +; CHECK: cpsdr %f0, %f0, %f2 +; CHECK: br %r14 + %b = fptrunc double %bd to float + %res = call float @copysignf(float %a, float %b) readnone + ret float %res +} + +; Test f32 copies in which the sign comes from an f128. +define float @f3(float %a, fp128 *%bptr) { +; CHECK: f3: +; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r2) +; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2) +; CHECK: cpsdr %f0, %f0, [[BHIGH]] +; CHECK: br %r14 + %bl = load volatile fp128 *%bptr + %b = fptrunc fp128 %bl to float + %res = call float @copysignf(float %a, float %b) readnone + ret float %res +} + +; Test f64 copies in which the sign comes from an f32. +define double @f4(double %a, float %bf) { +; CHECK: f4: +; CHECK-NOT: %f2 +; CHECK: cpsdr %f0, %f0, %f2 +; CHECK: br %r14 + %b = fpext float %bf to double + %res = call double @copysign(double %a, double %b) readnone + ret double %res +} + +; Test f64 copies in which the sign comes from an f64. +define double @f5(double %a, double %b) { +; CHECK: f5: +; CHECK-NOT: %f2 +; CHECK: cpsdr %f0, %f0, %f2 +; CHECK: br %r14 + %res = call double @copysign(double %a, double %b) readnone + ret double %res +} + +; Test f64 copies in which the sign comes from an f128. +define double @f6(double %a, fp128 *%bptr) { +; CHECK: f6: +; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r2) +; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2) +; CHECK: cpsdr %f0, %f0, [[BHIGH]] +; CHECK: br %r14 + %bl = load volatile fp128 *%bptr + %b = fptrunc fp128 %bl to double + %res = call double @copysign(double %a, double %b) readnone + ret double %res +} + +; Test f128 copies in which the sign comes from an f32. We shouldn't +; need any register shuffling here; %a should be tied to %c, with CPSDR +; just changing the high register. +define void @f7(fp128 *%cptr, fp128 *%aptr, float %bf) { +; CHECK: f7: +; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3) +; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3) +; CHECK: cpsdr [[AHIGH]], [[AHIGH]], %f0 +; CHECK: std [[AHIGH]], 0(%r2) +; CHECK: std [[ALOW]], 8(%r2) +; CHECK: br %r14 + %a = load volatile fp128 *%aptr + %b = fpext float %bf to fp128 + %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone + store fp128 %c, fp128 *%cptr + ret void +} + +; As above, but the sign comes from an f64. +define void @f8(fp128 *%cptr, fp128 *%aptr, double %bd) { +; CHECK: f8: +; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3) +; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3) +; CHECK: cpsdr [[AHIGH]], [[AHIGH]], %f0 +; CHECK: std [[AHIGH]], 0(%r2) +; CHECK: std [[ALOW]], 8(%r2) +; CHECK: br %r14 + %a = load volatile fp128 *%aptr + %b = fpext double %bd to fp128 + %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone + store fp128 %c, fp128 *%cptr + ret void +} + +; As above, but the sign comes from an f128. Don't require the low part +; of %b to be loaded, since it isn't used. +define void @f9(fp128 *%cptr, fp128 *%aptr, fp128 *%bptr) { +; CHECK: f9: +; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3) +; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3) +; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r4) +; CHECK: cpsdr [[AHIGH]], [[AHIGH]], [[BHIGH]] +; CHECK: std [[AHIGH]], 0(%r2) +; CHECK: std [[ALOW]], 8(%r2) +; CHECK: br %r14 + %a = load volatile fp128 *%aptr + %b = load volatile fp128 *%bptr + %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone + store fp128 %c, fp128 *%cptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-div-01.ll b/test/CodeGen/SystemZ/fp-div-01.ll new file mode 100644 index 0000000..080d45e --- /dev/null +++ b/test/CodeGen/SystemZ/fp-div-01.ll @@ -0,0 +1,71 @@ +; Test 32-bit floating-point division. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register division. +define float @f1(float %f1, float %f2) { +; CHECK: f1: +; CHECK: debr %f0, %f2 +; CHECK: br %r14 + %res = fdiv float %f1, %f2 + ret float %res +} + +; Check the low end of the DEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK: f2: +; CHECK: deb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float *%ptr + %res = fdiv float %f1, %f2 + ret float %res +} + +; Check the high end of the aligned DEB range. +define float @f3(float %f1, float *%base) { +; CHECK: f3: +; CHECK: deb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %f2 = load float *%ptr + %res = fdiv float %f1, %f2 + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: deb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %f2 = load float *%ptr + %res = fdiv float %f1, %f2 + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: deb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %f2 = load float *%ptr + %res = fdiv float %f1, %f2 + ret float %res +} + +; Check that DEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: deb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %f2 = load float *%ptr2 + %res = fdiv float %f1, %f2 + ret float %res +} diff --git a/test/CodeGen/SystemZ/fp-div-02.ll b/test/CodeGen/SystemZ/fp-div-02.ll new file mode 100644 index 0000000..c5cae15 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-div-02.ll @@ -0,0 +1,71 @@ +; Test 64-bit floating-point division. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register division. +define double @f1(double %f1, double %f2) { +; CHECK: f1: +; CHECK: ddbr %f0, %f2 +; CHECK: br %r14 + %res = fdiv double %f1, %f2 + ret double %res +} + +; Check the low end of the DDB range. +define double @f2(double %f1, double *%ptr) { +; CHECK: f2: +; CHECK: ddb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load double *%ptr + %res = fdiv double %f1, %f2 + ret double %res +} + +; Check the high end of the aligned DDB range. +define double @f3(double %f1, double *%base) { +; CHECK: f3: +; CHECK: ddb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr + %res = fdiv double %f1, %f2 + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %f1, double *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: ddb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr + %res = fdiv double %f1, %f2 + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double %f1, double *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -8 +; CHECK: ddb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr + %res = fdiv double %f1, %f2 + ret double %res +} + +; Check that DDB allows indices. +define double @f6(double %f1, double *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: ddb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 + %f2 = load double *%ptr2 + %res = fdiv double %f1, %f2 + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-div-03.ll b/test/CodeGen/SystemZ/fp-div-03.ll new file mode 100644 index 0000000..18f2d74 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-div-03.ll @@ -0,0 +1,20 @@ +; Test 128-bit floating-point division. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; There is no memory form of 128-bit division. +define void @f1(fp128 *%ptr, float %f2) { +; CHECK: f1: +; CHECK: lxebr %f0, %f0 +; CHECK: ld %f1, 0(%r2) +; CHECK: ld %f3, 8(%r2) +; CHECK: dxbr %f1, %f0 +; CHECK: std %f1, 0(%r2) +; CHECK: std %f3, 8(%r2) +; CHECK: br %r14 + %f1 = load fp128 *%ptr + %f2x = fpext float %f2 to fp128 + %sum = fdiv fp128 %f1, %f2x + store fp128 %sum, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-move-01.ll b/test/CodeGen/SystemZ/fp-move-01.ll new file mode 100644 index 0000000..73cd978 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-01.ll @@ -0,0 +1,30 @@ +; Test moves between FPRs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32 moves. +define float @f1(float %a, float %b) { +; CHECK: f1: +; CHECK: ler %f0, %f2 + ret float %b +} + +; Test f64 moves. +define double @f2(double %a, double %b) { +; CHECK: f2: +; CHECK: ldr %f0, %f2 + ret double %b +} + +; Test f128 moves. Since f128s are passed by reference, we need to force +; a copy by other means. +define void @f3(fp128 *%x) { +; CHECK: f3: +; CHECK: lxr +; CHECK: axbr + %val = load volatile fp128 *%x + %sum = fadd fp128 %val, %val + store volatile fp128 %sum, fp128 *%x + store volatile fp128 %val, fp128 *%x + ret void +} diff --git a/test/CodeGen/SystemZ/fp-move-02.ll b/test/CodeGen/SystemZ/fp-move-02.ll new file mode 100644 index 0000000..9d87797 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-02.ll @@ -0,0 +1,103 @@ +; Test moves between FPRs and GPRs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test 32-bit moves from GPRs to FPRs. The GPR must be moved into the high +; 32 bits of the FPR. +define float @f1(i32 %a) { +; CHECK: f1: +; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 32 +; CHECK: ldgr %f0, [[REGISTER]] + %res = bitcast i32 %a to float + ret float %res +} + +; Like f1, but create a situation where the shift can be folded with +; surrounding code. +define float @f2(i64 %big) { +; CHECK: f2: +; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 31 +; CHECK: ldgr %f0, [[REGISTER]] + %shift = lshr i64 %big, 1 + %a = trunc i64 %shift to i32 + %res = bitcast i32 %a to float + ret float %res +} + +; Another example of the same thing. +define float @f3(i64 %big) { +; CHECK: f3: +; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 2 +; CHECK: ldgr %f0, [[REGISTER]] + %shift = ashr i64 %big, 30 + %a = trunc i64 %shift to i32 + %res = bitcast i32 %a to float + ret float %res +} + +; Like f1, but the value to transfer is already in the high 32 bits. +define float @f4(i64 %big) { +; CHECK: f4: +; CHECK-NOT: %r2 +; CHECK: nilf %r2, 0 +; CHECK-NOT: %r2 +; CHECK: ldgr %f0, %r2 + %shift = ashr i64 %big, 32 + %a = trunc i64 %shift to i32 + %res = bitcast i32 %a to float + ret float %res +} + +; Test 64-bit moves from GPRs to FPRs. +define double @f5(i64 %a) { +; CHECK: f5: +; CHECK: ldgr %f0, %r2 + %res = bitcast i64 %a to double + ret double %res +} + +; Test 128-bit moves from GPRs to FPRs. i128 isn't a legitimate type, +; so this goes through memory. +define void @f6(fp128 *%a, i128 *%b) { +; CHECK: f6: +; CHECK: lg +; CHECK: lg +; CHECK: stg +; CHECK: stg + %val = load i128 *%b + %res = bitcast i128 %val to fp128 + store fp128 %res, fp128 *%a + ret void +} + +; Test 32-bit moves from FPRs to GPRs. The high 32 bits of the FPR should +; be moved into the low 32 bits of the GPR. +define i32 @f7(float %a) { +; CHECK: f7: +; CHECK: lgdr [[REGISTER:%r[0-5]]], %f0 +; CHECK: srlg %r2, [[REGISTER]], 32 + %res = bitcast float %a to i32 + ret i32 %res +} + +; Test 64-bit moves from FPRs to GPRs. +define i64 @f8(double %a) { +; CHECK: f8: +; CHECK: lgdr %r2, %f0 + %res = bitcast double %a to i64 + ret i64 %res +} + +; Test 128-bit moves from FPRs to GPRs, with the same restriction as f6. +define void @f9(fp128 *%a, i128 *%b) { +; CHECK: f9: +; CHECK: ld +; CHECK: ld +; CHECK: std +; CHECK: std + %val = load fp128 *%a + %res = bitcast fp128 %val to i128 + store i128 %res, i128 *%b + ret void +} + diff --git a/test/CodeGen/SystemZ/fp-move-03.ll b/test/CodeGen/SystemZ/fp-move-03.ll new file mode 100644 index 0000000..37dbdfa --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-03.ll @@ -0,0 +1,110 @@ +; Test 32-bit floating-point loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the low end of the LE range. +define float @f1(float *%src) { +; CHECK: f1: +; CHECK: le %f0, 0(%r2) +; CHECK: br %r14 + %val = load float *%src + ret float %val +} + +; Test the high end of the LE range. +define float @f2(float *%src) { +; CHECK: f2: +; CHECK: le %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 1023 + %val = load float *%ptr + ret float %val +} + +; Check the next word up, which should use LEY instead of LE. +define float @f3(float *%src) { +; CHECK: f3: +; CHECK: ley %f0, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 1024 + %val = load float *%ptr + ret float %val +} + +; Check the high end of the aligned LEY range. +define float @f4(float *%src) { +; CHECK: f4: +; CHECK: ley %f0, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 131071 + %val = load float *%ptr + ret float %val +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f5(float *%src) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: le %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 131072 + %val = load float *%ptr + ret float %val +} + +; Check the high end of the negative aligned LEY range. +define float @f6(float *%src) { +; CHECK: f6: +; CHECK: ley %f0, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 -1 + %val = load float *%ptr + ret float %val +} + +; Check the low end of the LEY range. +define float @f7(float *%src) { +; CHECK: f7: +; CHECK: ley %f0, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 -131072 + %val = load float *%ptr + ret float %val +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f8(float *%src) { +; CHECK: f8: +; CHECK: agfi %r2, -524292 +; CHECK: le %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 -131073 + %val = load float *%ptr + ret float %val +} + +; Check that LE allows an index. +define float @f9(i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: le %f0, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to float * + %val = load float *%ptr + ret float %val +} + +; Check that LEY allows an index. +define float @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to float * + %val = load float *%ptr + ret float %val +} diff --git a/test/CodeGen/SystemZ/fp-move-04.ll b/test/CodeGen/SystemZ/fp-move-04.ll new file mode 100644 index 0000000..72e90d1 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-04.ll @@ -0,0 +1,110 @@ +; Test 64-bit floating-point loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the low end of the LD range. +define double @f1(double *%src) { +; CHECK: f1: +; CHECK: ld %f0, 0(%r2) +; CHECK: br %r14 + %val = load double *%src + ret double %val +} + +; Test the high end of the LD range. +define double @f2(double *%src) { +; CHECK: f2: +; CHECK: ld %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 511 + %val = load double *%ptr + ret double %val +} + +; Check the next doubleword up, which should use LDY instead of LD. +define double @f3(double *%src) { +; CHECK: f3: +; CHECK: ldy %f0, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 512 + %val = load double *%ptr + ret double %val +} + +; Check the high end of the aligned LDY range. +define double @f4(double *%src) { +; CHECK: f4: +; CHECK: ldy %f0, 524280(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 65535 + %val = load double *%ptr + ret double %val +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f5(double *%src) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: ld %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 65536 + %val = load double *%ptr + ret double %val +} + +; Check the high end of the negative aligned LDY range. +define double @f6(double *%src) { +; CHECK: f6: +; CHECK: ldy %f0, -8(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 -1 + %val = load double *%ptr + ret double %val +} + +; Check the low end of the LDY range. +define double @f7(double *%src) { +; CHECK: f7: +; CHECK: ldy %f0, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 -65536 + %val = load double *%ptr + ret double %val +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f8(double *%src) { +; CHECK: f8: +; CHECK: agfi %r2, -524296 +; CHECK: ld %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 -65537 + %val = load double *%ptr + ret double %val +} + +; Check that LD allows an index. +define double @f9(i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: ld %f0, 4095({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to double * + %val = load double *%ptr + ret double %val +} + +; Check that LDY allows an index. +define double @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: ldy %f0, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to double * + %val = load double *%ptr + ret double %val +} diff --git a/test/CodeGen/SystemZ/fp-move-05.ll b/test/CodeGen/SystemZ/fp-move-05.ll new file mode 100644 index 0000000..66ad048 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-05.ll @@ -0,0 +1,151 @@ +; Test 128-bit floating-point loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check loads with no offset. +define double @f1(i64 %src) { +; CHECK: f1: +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: br %r14 + %ptr = inttoptr i64 %src to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the highest aligned offset that allows LD for both halves. +define double @f2(i64 %src) { +; CHECK: f2: +; CHECK: ld %f0, 4080(%r2) +; CHECK: ld %f2, 4088(%r2) +; CHECK: br %r14 + %add = add i64 %src, 4080 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the next doubleword up, which requires a mixture of LD and LDY. +define double @f3(i64 %src) { +; CHECK: f3: +; CHECK: ld %f0, 4088(%r2) +; CHECK: ldy %f2, 4096(%r2) +; CHECK: br %r14 + %add = add i64 %src, 4088 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the next doubleword after that, which requires LDY for both halves. +define double @f4(i64 %src) { +; CHECK: f4: +; CHECK: ldy %f0, 4096(%r2) +; CHECK: ldy %f2, 4104(%r2) +; CHECK: br %r14 + %add = add i64 %src, 4096 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the highest aligned offset that allows LDY for both halves. +define double @f5(i64 %src) { +; CHECK: f5: +; CHECK: ldy %f0, 524272(%r2) +; CHECK: ldy %f2, 524280(%r2) +; CHECK: br %r14 + %add = add i64 %src, 524272 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the next doubleword up, which requires separate address logic. +; Other sequences besides this one would be OK. +define double @f6(i64 %src) { +; CHECK: f6: +; CHECK: lay %r1, 524280(%r2) +; CHECK: ld %f0, 0(%r1) +; CHECK: ld %f2, 8(%r1) +; CHECK: br %r14 + %add = add i64 %src, 524280 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the highest aligned negative offset, which needs a combination of +; LDY and LD. +define double @f7(i64 %src) { +; CHECK: f7: +; CHECK: ldy %f0, -8(%r2) +; CHECK: ld %f2, 0(%r2) +; CHECK: br %r14 + %add = add i64 %src, -8 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the next doubleword down, which requires LDY for both halves. +define double @f8(i64 %src) { +; CHECK: f8: +; CHECK: ldy %f0, -16(%r2) +; CHECK: ldy %f2, -8(%r2) +; CHECK: br %r14 + %add = add i64 %src, -16 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the lowest offset that allows LDY for both halves. +define double @f9(i64 %src) { +; CHECK: f9: +; CHECK: ldy %f0, -524288(%r2) +; CHECK: ldy %f2, -524280(%r2) +; CHECK: br %r14 + %add = add i64 %src, -524288 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check the next doubleword down, which requires separate address logic. +; Other sequences besides this one would be OK. +define double @f10(i64 %src) { +; CHECK: f10: +; CHECK: agfi %r2, -524296 +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: br %r14 + %add = add i64 %src, -524296 + %ptr = inttoptr i64 %add to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} + +; Check that indices are allowed. +define double @f11(i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: ld %f0, 4088({{%r2,%r3|%r3,%r2}}) +; CHECK: ldy %f2, 4096({{%r2,%r3|%r3,%r2}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4088 + %ptr = inttoptr i64 %add2 to fp128 * + %val = load fp128 *%ptr + %trunc = fptrunc fp128 %val to double + ret double %trunc +} diff --git a/test/CodeGen/SystemZ/fp-move-06.ll b/test/CodeGen/SystemZ/fp-move-06.ll new file mode 100644 index 0000000..b660c2a --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-06.ll @@ -0,0 +1,110 @@ +; Test 32-bit floating-point stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the low end of the STE range. +define void @f1(float *%ptr, float %val) { +; CHECK: f1: +; CHECK: ste %f0, 0(%r2) +; CHECK: br %r14 + store float %val, float *%ptr + ret void +} + +; Test the high end of the STE range. +define void @f2(float *%src, float %val) { +; CHECK: f2: +; CHECK: ste %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 1023 + store float %val, float *%ptr + ret void +} + +; Check the next word up, which should use STEY instead of STE. +define void @f3(float *%src, float %val) { +; CHECK: f3: +; CHECK: stey %f0, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 1024 + store float %val, float *%ptr + ret void +} + +; Check the high end of the aligned STEY range. +define void @f4(float *%src, float %val) { +; CHECK: f4: +; CHECK: stey %f0, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 131071 + store float %val, float *%ptr + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f5(float *%src, float %val) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: ste %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 131072 + store float %val, float *%ptr + ret void +} + +; Check the high end of the negative aligned STEY range. +define void @f6(float *%src, float %val) { +; CHECK: f6: +; CHECK: stey %f0, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 -1 + store float %val, float *%ptr + ret void +} + +; Check the low end of the STEY range. +define void @f7(float *%src, float %val) { +; CHECK: f7: +; CHECK: stey %f0, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 -131072 + store float %val, float *%ptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(float *%src, float %val) { +; CHECK: f8: +; CHECK: agfi %r2, -524292 +; CHECK: ste %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%src, i64 -131073 + store float %val, float *%ptr + ret void +} + +; Check that STE allows an index. +define void @f9(i64 %src, i64 %index, float %val) { +; CHECK: f9: +; CHECK: ste %f0, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to float * + store float %val, float *%ptr + ret void +} + +; Check that STEY allows an index. +define void @f10(i64 %src, i64 %index, float %val) { +; CHECK: f10: +; CHECK: stey %f0, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to float * + store float %val, float *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-move-07.ll b/test/CodeGen/SystemZ/fp-move-07.ll new file mode 100644 index 0000000..0cb0474 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-07.ll @@ -0,0 +1,110 @@ +; Test 64-bit floating-point stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the low end of the STD range. +define void @f1(double *%src, double %val) { +; CHECK: f1: +; CHECK: std %f0, 0(%r2) +; CHECK: br %r14 + store double %val, double *%src + ret void +} + +; Test the high end of the STD range. +define void @f2(double *%src, double %val) { +; CHECK: f2: +; CHECK: std %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 511 + store double %val, double *%ptr + ret void +} + +; Check the next doubleword up, which should use STDY instead of STD. +define void @f3(double *%src, double %val) { +; CHECK: f3: +; CHECK: stdy %f0, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 512 + store double %val, double *%ptr + ret void +} + +; Check the high end of the aligned STDY range. +define void @f4(double *%src, double %val) { +; CHECK: f4: +; CHECK: stdy %f0, 524280(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 65535 + store double %val, double *%ptr + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f5(double *%src, double %val) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: std %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 65536 + store double %val, double *%ptr + ret void +} + +; Check the high end of the negative aligned STDY range. +define void @f6(double *%src, double %val) { +; CHECK: f6: +; CHECK: stdy %f0, -8(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 -1 + store double %val, double *%ptr + ret void +} + +; Check the low end of the STDY range. +define void @f7(double *%src, double %val) { +; CHECK: f7: +; CHECK: stdy %f0, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 -65536 + store double %val, double *%ptr + ret void +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(double *%src, double %val) { +; CHECK: f8: +; CHECK: agfi %r2, -524296 +; CHECK: std %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%src, i64 -65537 + store double %val, double *%ptr + ret void +} + +; Check that STD allows an index. +define void @f9(i64 %src, i64 %index, double %val) { +; CHECK: f9: +; CHECK: std %f0, 4095({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to double * + store double %val, double *%ptr + ret void +} + +; Check that STDY allows an index. +define void @f10(i64 %src, i64 %index, double %val) { +; CHECK: f10: +; CHECK: stdy %f0, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to double * + store double %val, double *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-move-08.ll b/test/CodeGen/SystemZ/fp-move-08.ll new file mode 100644 index 0000000..448d2ac --- /dev/null +++ b/test/CodeGen/SystemZ/fp-move-08.ll @@ -0,0 +1,151 @@ +; Test 128-bit floating-point stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check stores with no offset. +define void @f1(i64 %src, double %val) { +; CHECK: f1: +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %ptr = inttoptr i64 %src to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the highest aligned offset that allows STD for both halves. +define void @f2(i64 %src, double %val) { +; CHECK: f2: +; CHECK: std %f0, 4080(%r2) +; CHECK: std %f2, 4088(%r2) +; CHECK: br %r14 + %add = add i64 %src, 4080 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the next doubleword up, which requires a mixture of STD and STDY. +define void @f3(i64 %src, double %val) { +; CHECK: f3: +; CHECK: std %f0, 4088(%r2) +; CHECK: stdy %f2, 4096(%r2) +; CHECK: br %r14 + %add = add i64 %src, 4088 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the next doubleword after that, which requires STDY for both halves. +define void @f4(i64 %src, double %val) { +; CHECK: f4: +; CHECK: stdy %f0, 4096(%r2) +; CHECK: stdy %f2, 4104(%r2) +; CHECK: br %r14 + %add = add i64 %src, 4096 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the highest aligned offset that allows STDY for both halves. +define void @f5(i64 %src, double %val) { +; CHECK: f5: +; CHECK: stdy %f0, 524272(%r2) +; CHECK: stdy %f2, 524280(%r2) +; CHECK: br %r14 + %add = add i64 %src, 524272 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the next doubleword up, which requires separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i64 %src, double %val) { +; CHECK: f6: +; CHECK: lay %r1, 524280(%r2) +; CHECK: std %f0, 0(%r1) +; CHECK: std %f2, 8(%r1) +; CHECK: br %r14 + %add = add i64 %src, 524280 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the highest aligned negative offset, which needs a combination of +; STDY and STD. +define void @f7(i64 %src, double %val) { +; CHECK: f7: +; CHECK: stdy %f0, -8(%r2) +; CHECK: std %f2, 0(%r2) +; CHECK: br %r14 + %add = add i64 %src, -8 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the next doubleword down, which requires STDY for both halves. +define void @f8(i64 %src, double %val) { +; CHECK: f8: +; CHECK: stdy %f0, -16(%r2) +; CHECK: stdy %f2, -8(%r2) +; CHECK: br %r14 + %add = add i64 %src, -16 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the lowest offset that allows STDY for both halves. +define void @f9(i64 %src, double %val) { +; CHECK: f9: +; CHECK: stdy %f0, -524288(%r2) +; CHECK: stdy %f2, -524280(%r2) +; CHECK: br %r14 + %add = add i64 %src, -524288 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check the next doubleword down, which requires separate address logic. +; Other sequences besides this one would be OK. +define void @f10(i64 %src, double %val) { +; CHECK: f10: +; CHECK: agfi %r2, -524296 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %add = add i64 %src, -524296 + %ptr = inttoptr i64 %add to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} + +; Check that indices are allowed. +define void @f11(i64 %src, i64 %index, double %val) { +; CHECK: f11: +; CHECK: std %f0, 4088({{%r2,%r3|%r3,%r2}}) +; CHECK: stdy %f2, 4096({{%r2,%r3|%r3,%r2}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4088 + %ptr = inttoptr i64 %add2 to fp128 * + %ext = fpext double %val to fp128 + store fp128 %ext, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-mul-01.ll b/test/CodeGen/SystemZ/fp-mul-01.ll new file mode 100644 index 0000000..68c78ee --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-01.ll @@ -0,0 +1,71 @@ +; Test multiplication of two f32s, producing an f32 result. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register multiplication. +define float @f1(float %f1, float %f2) { +; CHECK: f1: +; CHECK: meebr %f0, %f2 +; CHECK: br %r14 + %res = fmul float %f1, %f2 + ret float %res +} + +; Check the low end of the MEEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK: f2: +; CHECK: meeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float *%ptr + %res = fmul float %f1, %f2 + ret float %res +} + +; Check the high end of the aligned MEEB range. +define float @f3(float %f1, float *%base) { +; CHECK: f3: +; CHECK: meeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %f2 = load float *%ptr + %res = fmul float %f1, %f2 + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: meeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %f2 = load float *%ptr + %res = fmul float %f1, %f2 + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: meeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %f2 = load float *%ptr + %res = fmul float %f1, %f2 + ret float %res +} + +; Check that MEEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: meeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %f2 = load float *%ptr2 + %res = fmul float %f1, %f2 + ret float %res +} diff --git a/test/CodeGen/SystemZ/fp-mul-02.ll b/test/CodeGen/SystemZ/fp-mul-02.ll new file mode 100644 index 0000000..ec51a4c --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-02.ll @@ -0,0 +1,83 @@ +; Test multiplication of two f32s, producing an f64 result. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register multiplication. +define double @f1(float %f1, float %f2) { +; CHECK: f1: +; CHECK: mdebr %f0, %f2 +; CHECK: br %r14 + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = fmul double %f1x, %f2x + ret double %res +} + +; Check the low end of the MDEB range. +define double @f2(float %f1, float *%ptr) { +; CHECK: f2: +; CHECK: mdeb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float *%ptr + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = fmul double %f1x, %f2x + ret double %res +} + +; Check the high end of the aligned MDEB range. +define double @f3(float %f1, float *%base) { +; CHECK: f3: +; CHECK: mdeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %f2 = load float *%ptr + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = fmul double %f1x, %f2x + ret double %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(float %f1, float *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: mdeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %f2 = load float *%ptr + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = fmul double %f1x, %f2x + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(float %f1, float *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: mdeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %f2 = load float *%ptr + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = fmul double %f1x, %f2x + ret double %res +} + +; Check that MDEB allows indices. +define double @f6(float %f1, float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: mdeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %f2 = load float *%ptr2 + %f1x = fpext float %f1 to double + %f2x = fpext float %f2 to double + %res = fmul double %f1x, %f2x + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-mul-03.ll b/test/CodeGen/SystemZ/fp-mul-03.ll new file mode 100644 index 0000000..9849247 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-03.ll @@ -0,0 +1,71 @@ +; Test multiplication of two f64s, producing an f64 result. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register multiplication. +define double @f1(double %f1, double %f2) { +; CHECK: f1: +; CHECK: mdbr %f0, %f2 +; CHECK: br %r14 + %res = fmul double %f1, %f2 + ret double %res +} + +; Check the low end of the MDB range. +define double @f2(double %f1, double *%ptr) { +; CHECK: f2: +; CHECK: mdb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load double *%ptr + %res = fmul double %f1, %f2 + ret double %res +} + +; Check the high end of the aligned MDB range. +define double @f3(double %f1, double *%base) { +; CHECK: f3: +; CHECK: mdb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr + %res = fmul double %f1, %f2 + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %f1, double *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: mdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr + %res = fmul double %f1, %f2 + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double %f1, double *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -8 +; CHECK: mdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr + %res = fmul double %f1, %f2 + ret double %res +} + +; Check that MDB allows indices. +define double @f6(double %f1, double *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: mdb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 + %f2 = load double *%ptr2 + %res = fmul double %f1, %f2 + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-mul-04.ll b/test/CodeGen/SystemZ/fp-mul-04.ll new file mode 100644 index 0000000..712ead8 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-04.ll @@ -0,0 +1,103 @@ +; Test multiplication of two f64s, producing an f128 result. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register multiplication. "mxdbr %f0, %f2" is not valid from LLVM's +; point of view, because %f2 is the low register of the FP128 %f0. Pass the +; multiplier in %f4 instead. +define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) { +; CHECK: f1: +; CHECK: mxdbr %f0, %f4 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = fmul fp128 %f1x, %f2x + store fp128 %res, fp128 *%dst + ret void +} + +; Check the low end of the MXDB range. +define void @f2(double %f1, double *%ptr, fp128 *%dst) { +; CHECK: f2: +; CHECK: mxdb %f0, 0(%r2) +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %f2 = load double *%ptr + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = fmul fp128 %f1x, %f2x + store fp128 %res, fp128 *%dst + ret void +} + +; Check the high end of the aligned MXDB range. +define void @f3(double %f1, double *%base, fp128 *%dst) { +; CHECK: f3: +; CHECK: mxdb %f0, 4088(%r2) +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = fmul fp128 %f1x, %f2x + store fp128 %res, fp128 *%dst + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f4(double %f1, double *%base, fp128 *%dst) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: mxdb %f0, 0(%r2) +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = fmul fp128 %f1x, %f2x + store fp128 %res, fp128 *%dst + ret void +} + +; Check negative displacements, which also need separate address logic. +define void @f5(double %f1, double *%base, fp128 *%dst) { +; CHECK: f5: +; CHECK: aghi %r2, -8 +; CHECK: mxdb %f0, 0(%r2) +; CHECK: std %f0, 0(%r3) +; CHECK: std %f2, 8(%r3) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = fmul fp128 %f1x, %f2x + store fp128 %res, fp128 *%dst + ret void +} + +; Check that MXDB allows indices. +define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: mxdb %f0, 800(%r1,%r2) +; CHECK: std %f0, 0(%r4) +; CHECK: std %f2, 8(%r4) +; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 + %f2 = load double *%ptr2 + %f1x = fpext double %f1 to fp128 + %f2x = fpext double %f2 to fp128 + %res = fmul fp128 %f1x, %f2x + store fp128 %res, fp128 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/fp-mul-05.ll b/test/CodeGen/SystemZ/fp-mul-05.ll new file mode 100644 index 0000000..df5bc4e --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-05.ll @@ -0,0 +1,20 @@ +; Test multiplication of two f128s. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; There is no memory form of 128-bit multiplication. +define void @f1(fp128 *%ptr, float %f2) { +; CHECK: f1: +; CHECK: lxebr %f0, %f0 +; CHECK: ld %f1, 0(%r2) +; CHECK: ld %f3, 8(%r2) +; CHECK: mxbr %f1, %f0 +; CHECK: std %f1, 0(%r2) +; CHECK: std %f3, 8(%r2) +; CHECK: br %r14 + %f1 = load fp128 *%ptr + %f2x = fpext float %f2 to fp128 + %diff = fmul fp128 %f1, %f2x + store fp128 %diff, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-mul-06.ll b/test/CodeGen/SystemZ/fp-mul-06.ll new file mode 100644 index 0000000..8124c68 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-06.ll @@ -0,0 +1,102 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare float @llvm.fma.f32(float %f1, float %f2, float %f3) + +define float @f1(float %f1, float %f2, float %acc) { +; CHECK: f1: +; CHECK: maebr %f4, %f0, %f2 +; CHECK: ler %f0, %f4 +; CHECK: br %r14 + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} + +define float @f2(float %f1, float *%ptr, float %acc) { +; CHECK: f2: +; CHECK: maeb %f2, %f0, 0(%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %f2 = load float *%ptr + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} + +define float @f3(float %f1, float *%base, float %acc) { +; CHECK: f3: +; CHECK: maeb %f2, %f0, 4092(%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %f2 = load float *%ptr + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} + +define float @f4(float %f1, float *%base, float %acc) { +; The important thing here is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: maeb %f2, %f0, 0(%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %f2 = load float *%ptr + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} + +define float @f5(float %f1, float *%base, float %acc) { +; Here too the important thing is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: maeb %f2, %f0, 0(%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %f2 = load float *%ptr + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} + +define float @f6(float %f1, float *%base, i64 %index, float %acc) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: maeb %f2, %f0, 0(%r1,%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 %index + %f2 = load float *%ptr + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} + +define float @f7(float %f1, float *%base, i64 %index, float %acc) { +; CHECK: f7: +; CHECK: sllg %r1, %r3, 2 +; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 1023 + %ptr = getelementptr float *%base, i64 %index2 + %f2 = load float *%ptr + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} + +define float @f8(float %f1, float *%base, i64 %index, float %acc) { +; CHECK: f8: +; CHECK: sllg %r1, %r3, 2 +; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) +; CHECK: maeb %f2, %f0, 0(%r1) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 1024 + %ptr = getelementptr float *%base, i64 %index2 + %f2 = load float *%ptr + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} diff --git a/test/CodeGen/SystemZ/fp-mul-07.ll b/test/CodeGen/SystemZ/fp-mul-07.ll new file mode 100644 index 0000000..b8e4483 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-07.ll @@ -0,0 +1,102 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare double @llvm.fma.f64(double %f1, double %f2, double %f3) + +define double @f1(double %f1, double %f2, double %acc) { +; CHECK: f1: +; CHECK: madbr %f4, %f0, %f2 +; CHECK: ldr %f0, %f4 +; CHECK: br %r14 + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +} + +define double @f2(double %f1, double *%ptr, double %acc) { +; CHECK: f2: +; CHECK: madb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %f2 = load double *%ptr + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +} + +define double @f3(double %f1, double *%base, double %acc) { +; CHECK: f3: +; CHECK: madb %f2, %f0, 4088(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +} + +define double @f4(double %f1, double *%base, double %acc) { +; The important thing here is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: madb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +} + +define double @f5(double %f1, double *%base, double %acc) { +; Here too the important thing is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK: f5: +; CHECK: aghi %r2, -8 +; CHECK: madb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +} + +define double @f6(double %f1, double *%base, i64 %index, double %acc) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: madb %f2, %f0, 0(%r1,%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 %index + %f2 = load double *%ptr + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +} + +define double @f7(double %f1, double *%base, i64 %index, double %acc) { +; CHECK: f7: +; CHECK: sllg %r1, %r3, 3 +; CHECK: madb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}}) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 511 + %ptr = getelementptr double *%base, i64 %index2 + %f2 = load double *%ptr + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +} + +define double @f8(double %f1, double *%base, i64 %index, double %acc) { +; CHECK: f8: +; CHECK: sllg %r1, %r3, 3 +; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) +; CHECK: madb %f2, %f0, 0(%r1) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 512 + %ptr = getelementptr double *%base, i64 %index2 + %f2 = load double *%ptr + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-mul-08.ll b/test/CodeGen/SystemZ/fp-mul-08.ll new file mode 100644 index 0000000..5c14740 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-08.ll @@ -0,0 +1,110 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare float @llvm.fma.f32(float %f1, float %f2, float %f3) + +define float @f1(float %f1, float %f2, float %acc) { +; CHECK: f1: +; CHECK: msebr %f4, %f0, %f2 +; CHECK: ler %f0, %f4 +; CHECK: br %r14 + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + ret float %res +} + +define float @f2(float %f1, float *%ptr, float %acc) { +; CHECK: f2: +; CHECK: mseb %f2, %f0, 0(%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %f2 = load float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + ret float %res +} + +define float @f3(float %f1, float *%base, float %acc) { +; CHECK: f3: +; CHECK: mseb %f2, %f0, 4092(%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %f2 = load float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + ret float %res +} + +define float @f4(float %f1, float *%base, float %acc) { +; The important thing here is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: mseb %f2, %f0, 0(%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %f2 = load float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + ret float %res +} + +define float @f5(float %f1, float *%base, float %acc) { +; Here too the important thing is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: mseb %f2, %f0, 0(%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %f2 = load float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + ret float %res +} + +define float @f6(float %f1, float *%base, i64 %index, float %acc) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: mseb %f2, %f0, 0(%r1,%r2) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 %index + %f2 = load float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + ret float %res +} + +define float @f7(float %f1, float *%base, i64 %index, float %acc) { +; CHECK: f7: +; CHECK: sllg %r1, %r3, 2 +; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 1023 + %ptr = getelementptr float *%base, i64 %index2 + %f2 = load float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + ret float %res +} + +define float @f8(float %f1, float *%base, i64 %index, float %acc) { +; CHECK: f8: +; CHECK: sllg %r1, %r3, 2 +; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) +; CHECK: mseb %f2, %f0, 0(%r1) +; CHECK: ler %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 1024 + %ptr = getelementptr float *%base, i64 %index2 + %f2 = load float *%ptr + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + ret float %res +} diff --git a/test/CodeGen/SystemZ/fp-mul-09.ll b/test/CodeGen/SystemZ/fp-mul-09.ll new file mode 100644 index 0000000..bcae1e3 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-09.ll @@ -0,0 +1,110 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare double @llvm.fma.f64(double %f1, double %f2, double %f3) + +define double @f1(double %f1, double %f2, double %acc) { +; CHECK: f1: +; CHECK: msdbr %f4, %f0, %f2 +; CHECK: ldr %f0, %f4 +; CHECK: br %r14 + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + ret double %res +} + +define double @f2(double %f1, double *%ptr, double %acc) { +; CHECK: f2: +; CHECK: msdb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %f2 = load double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + ret double %res +} + +define double @f3(double %f1, double *%base, double %acc) { +; CHECK: f3: +; CHECK: msdb %f2, %f0, 4088(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + ret double %res +} + +define double @f4(double %f1, double *%base, double %acc) { +; The important thing here is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: msdb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + ret double %res +} + +define double @f5(double %f1, double *%base, double %acc) { +; Here too the important thing is that we don't generate an out-of-range +; displacement. Other sequences besides this one would be OK. +; +; CHECK: f5: +; CHECK: aghi %r2, -8 +; CHECK: msdb %f2, %f0, 0(%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + ret double %res +} + +define double @f6(double %f1, double *%base, i64 %index, double %acc) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: msdb %f2, %f0, 0(%r1,%r2) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 %index + %f2 = load double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + ret double %res +} + +define double @f7(double %f1, double *%base, i64 %index, double %acc) { +; CHECK: f7: +; CHECK: sllg %r1, %r3, 3 +; CHECK: msdb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}}) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 511 + %ptr = getelementptr double *%base, i64 %index2 + %f2 = load double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + ret double %res +} + +define double @f8(double %f1, double *%base, i64 %index, double %acc) { +; CHECK: f8: +; CHECK: sllg %r1, %r3, 3 +; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) +; CHECK: msdb %f2, %f0, 0(%r1) +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %index2 = add i64 %index, 512 + %ptr = getelementptr double *%base, i64 %index2 + %f2 = load double *%ptr + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll new file mode 100644 index 0000000..09a4a53 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-neg-01.ll @@ -0,0 +1,38 @@ +; Test floating-point negation. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32. +define float @f1(float %f) { +; CHECK: f1: +; CHECK: lcebr %f0, %f0 +; CHECK: br %r14 + %res = fsub float -0.0, %f + ret float %res +} + +; Test f64. +define double @f2(double %f) { +; CHECK: f2: +; CHECK: lcdbr %f0, %f0 +; CHECK: br %r14 + %res = fsub double -0.0, %f + ret double %res +} + +; Test f128. With the loads and stores, a pure negation would probably +; be better implemented using an XI on the upper byte. Do some extra +; processing so that using FPRs is unequivocally better. +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK: f3: +; CHECK: lcxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 *%ptr + %negzero = fpext float -0.0 to fp128 + %neg = fsub fp128 0xL00000000000000008000000000000000, %orig + %op2 = load fp128 *%ptr2 + %res = fdiv fp128 %neg, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-round-01.ll b/test/CodeGen/SystemZ/fp-round-01.ll new file mode 100644 index 0000000..20325c3 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-round-01.ll @@ -0,0 +1,36 @@ +; Test rint()-like rounding, with non-integer values triggering an +; inexact condition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test f32. +declare float @llvm.rint.f32(float %f) +define float @f1(float %f) { +; CHECK: f1: +; CHECK: fiebr %f0, 0, %f0 +; CHECK: br %r14 + %res = call float @llvm.rint.f32(float %f) + ret float %res +} + +; Test f64. +declare double @llvm.rint.f64(double %f) +define double @f2(double %f) { +; CHECK: f2: +; CHECK: fidbr %f0, 0, %f0 +; CHECK: br %r14 + %res = call double @llvm.rint.f64(double %f) + ret double %res +} + +; Test f128. +declare fp128 @llvm.rint.f128(fp128 %f) +define void @f3(fp128 *%ptr) { +; CHECK: f3: +; CHECK: fixbr %f0, 0, %f0 +; CHECK: br %r14 + %src = load fp128 *%ptr + %res = call fp128 @llvm.rint.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll new file mode 100644 index 0000000..7ed27f5 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll @@ -0,0 +1,73 @@ +; Test 32-bit square root. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare float @llvm.sqrt.f32(float %f) + +; Check register square root. +define float @f1(float %val) { +; CHECK: f1: +; CHECK: sqebr %f0, %f0 +; CHECK: br %r14 + %res = call float @llvm.sqrt.f32(float %val) + ret float %res +} + +; Check the low end of the SQEB range. +define float @f2(float *%ptr) { +; CHECK: f2: +; CHECK: sqeb %f0, 0(%r2) +; CHECK: br %r14 + %val = load float *%ptr + %res = call float @llvm.sqrt.f32(float %val) + ret float %res +} + +; Check the high end of the aligned SQEB range. +define float @f3(float *%base) { +; CHECK: f3: +; CHECK: sqeb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %val = load float *%ptr + %res = call float @llvm.sqrt.f32(float %val) + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: sqeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %val = load float *%ptr + %res = call float @llvm.sqrt.f32(float %val) + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: sqeb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %val = load float *%ptr + %res = call float @llvm.sqrt.f32(float %val) + ret float %res +} + +; Check that SQEB allows indices. +define float @f6(float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: sqeb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %val = load float *%ptr2 + %res = call float @llvm.sqrt.f32(float %val) + ret float %res +} diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll new file mode 100644 index 0000000..22a91ad --- /dev/null +++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll @@ -0,0 +1,73 @@ +; Test 64-bit square root. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare double @llvm.sqrt.f64(double %f) + +; Check register square root. +define double @f1(double %val) { +; CHECK: f1: +; CHECK: sqdbr %f0, %f0 +; CHECK: br %r14 + %res = call double @llvm.sqrt.f64(double %val) + ret double %res +} + +; Check the low end of the SQDB range. +define double @f2(double *%ptr) { +; CHECK: f2: +; CHECK: sqdb %f0, 0(%r2) +; CHECK: br %r14 + %val = load double *%ptr + %res = call double @llvm.sqrt.f64(double %val) + ret double %res +} + +; Check the high end of the aligned SQDB range. +define double @f3(double *%base) { +; CHECK: f3: +; CHECK: sqdb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %val = load double *%ptr + %res = call double @llvm.sqrt.f64(double %val) + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: sqdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %val = load double *%ptr + %res = call double @llvm.sqrt.f64(double %val) + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -8 +; CHECK: sqdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %val = load double *%ptr + %res = call double @llvm.sqrt.f64(double %val) + ret double %res +} + +; Check that SQDB allows indices. +define double @f6(double *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: sqdb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 + %val = load double *%ptr2 + %res = call double @llvm.sqrt.f64(double %val) + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-sqrt-03.ll b/test/CodeGen/SystemZ/fp-sqrt-03.ll new file mode 100644 index 0000000..1b49af4 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-sqrt-03.ll @@ -0,0 +1,20 @@ +; Test 128-bit square root. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare fp128 @llvm.sqrt.f128(fp128 %f) + +; There's no memory form of SQXBR. +define void @f1(fp128 *%ptr) { +; CHECK: f1: +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: sqxbr %f0, %f0 +; CHECK: std %f0, 0(%r2) +; CHECK: std %f2, 8(%r2) +; CHECK: br %r14 + %orig = load fp128 *%ptr + %sqrt = call fp128 @llvm.sqrt.f128(fp128 %orig) + store fp128 %sqrt, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-sub-01.ll b/test/CodeGen/SystemZ/fp-sub-01.ll new file mode 100644 index 0000000..b03f04b --- /dev/null +++ b/test/CodeGen/SystemZ/fp-sub-01.ll @@ -0,0 +1,71 @@ +; Test 32-bit floating-point subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register subtraction. +define float @f1(float %f1, float %f2) { +; CHECK: f1: +; CHECK: sebr %f0, %f2 +; CHECK: br %r14 + %res = fsub float %f1, %f2 + ret float %res +} + +; Check the low end of the SEB range. +define float @f2(float %f1, float *%ptr) { +; CHECK: f2: +; CHECK: seb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load float *%ptr + %res = fsub float %f1, %f2 + ret float %res +} + +; Check the high end of the aligned SEB range. +define float @f3(float %f1, float *%base) { +; CHECK: f3: +; CHECK: seb %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %f2 = load float *%ptr + %res = fsub float %f1, %f2 + ret float %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f4(float %f1, float *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: seb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %f2 = load float *%ptr + %res = fsub float %f1, %f2 + ret float %res +} + +; Check negative displacements, which also need separate address logic. +define float @f5(float %f1, float *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -4 +; CHECK: seb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -1 + %f2 = load float *%ptr + %res = fsub float %f1, %f2 + ret float %res +} + +; Check that SEB allows indices. +define float @f6(float %f1, float *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 2 +; CHECK: seb %f0, 400(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr float *%base, i64 %index + %ptr2 = getelementptr float *%ptr1, i64 100 + %f2 = load float *%ptr2 + %res = fsub float %f1, %f2 + ret float %res +} diff --git a/test/CodeGen/SystemZ/fp-sub-02.ll b/test/CodeGen/SystemZ/fp-sub-02.ll new file mode 100644 index 0000000..bf9848c --- /dev/null +++ b/test/CodeGen/SystemZ/fp-sub-02.ll @@ -0,0 +1,71 @@ +; Test 64-bit floating-point subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register subtraction. +define double @f1(double %f1, double %f2) { +; CHECK: f1: +; CHECK: sdbr %f0, %f2 +; CHECK: br %r14 + %res = fsub double %f1, %f2 + ret double %res +} + +; Check the low end of the SDB range. +define double @f2(double %f1, double *%ptr) { +; CHECK: f2: +; CHECK: sdb %f0, 0(%r2) +; CHECK: br %r14 + %f2 = load double *%ptr + %res = fsub double %f1, %f2 + ret double %res +} + +; Check the high end of the aligned SDB range. +define double @f3(double %f1, double *%base) { +; CHECK: f3: +; CHECK: sdb %f0, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr + %res = fsub double %f1, %f2 + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %f1, double *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: sdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr + %res = fsub double %f1, %f2 + ret double %res +} + +; Check negative displacements, which also need separate address logic. +define double @f5(double %f1, double *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -8 +; CHECK: sdb %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr + %res = fsub double %f1, %f2 + ret double %res +} + +; Check that SDB allows indices. +define double @f6(double %f1, double *%base, i64 %index) { +; CHECK: f6: +; CHECK: sllg %r1, %r3, 3 +; CHECK: sdb %f0, 800(%r1,%r2) +; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 + %f2 = load double *%ptr2 + %res = fsub double %f1, %f2 + ret double %res +} diff --git a/test/CodeGen/SystemZ/fp-sub-03.ll b/test/CodeGen/SystemZ/fp-sub-03.ll new file mode 100644 index 0000000..82bb94d --- /dev/null +++ b/test/CodeGen/SystemZ/fp-sub-03.ll @@ -0,0 +1,20 @@ +; Test 128-bit floating-point subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; There is no memory form of 128-bit subtraction. +define void @f1(fp128 *%ptr, float %f2) { +; CHECK: f1: +; CHECK: lxebr %f0, %f0 +; CHECK: ld %f1, 0(%r2) +; CHECK: ld %f3, 8(%r2) +; CHECK: sxbr %f1, %f0 +; CHECK: std %f1, 0(%r2) +; CHECK: std %f3, 8(%r2) +; CHECK: br %r14 + %f1 = load fp128 *%ptr + %f2x = fpext float %f2 to fp128 + %sum = fsub fp128 %f1, %f2x + store fp128 %sum, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-01.ll b/test/CodeGen/SystemZ/frame-01.ll new file mode 100644 index 0000000..0d34312 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-01.ll @@ -0,0 +1,110 @@ +; Test the allocation of frames in cases where we do not need to save +; registers in the prologue. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; The CFA offset is 160 (the caller-allocated part of the frame) + 168. +define void @f1(i64 %x) { +; CHECK: f1: +; CHECK: aghi %r15, -168 +; CHECK: .cfi_def_cfa_offset 328 +; CHECK: stg %r2, 160(%r15) +; CHECK: aghi %r15, 168 +; CHECK: br %r14 + %y = alloca i64, align 8 + store volatile i64 %x, i64* %y + ret void +} + +; Check frames of size 32760, which is the largest size that can be both +; allocated and freed using AGHI. This size is big enough to require +; an emergency spill slot at 160(%r15), for instructions with unsigned +; 12-bit offsets that end up being out of range. Fill the remaining +; 32760 - 168 bytes by allocating (32760 - 168) / 8 = 4074 doublewords. +define void @f2(i64 %x) { +; CHECK: f2: +; CHECK: aghi %r15, -32760 +; CHECK: .cfi_def_cfa_offset 32920 +; CHECK: stg %r2, 168(%r15) +; CHECK: aghi %r15, 32760 +; CHECK: br %r14 + %y = alloca [4074 x i64], align 8 + %ptr = getelementptr inbounds [4074 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %ptr + ret void +} + +; Allocate one more doubleword. This is the one frame size that we can +; allocate using AGHI but must free using AGFI. +define void @f3(i64 %x) { +; CHECK: f3: +; CHECK: aghi %r15, -32768 +; CHECK: .cfi_def_cfa_offset 32928 +; CHECK: stg %r2, 168(%r15) +; CHECK: agfi %r15, 32768 +; CHECK: br %r14 + %y = alloca [4075 x i64], align 8 + %ptr = getelementptr inbounds [4075 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %ptr + ret void +} + +; Allocate another doubleword on top of that. The allocation and free +; must both use AGFI. +define void @f4(i64 %x) { +; CHECK: f4: +; CHECK: agfi %r15, -32776 +; CHECK: .cfi_def_cfa_offset 32936 +; CHECK: stg %r2, 168(%r15) +; CHECK: agfi %r15, 32776 +; CHECK: br %r14 + %y = alloca [4076 x i64], align 8 + %ptr = getelementptr inbounds [4076 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %ptr + ret void +} + +; The largest size that can be both allocated and freed using AGFI. +; At this point the frame is too big to represent properly in the CFI. +define void @f5(i64 %x) { +; CHECK: f5: +; CHECK: agfi %r15, -2147483640 +; CHECK: stg %r2, 168(%r15) +; CHECK: agfi %r15, 2147483640 +; CHECK: br %r14 + %y = alloca [268435434 x i64], align 8 + %ptr = getelementptr inbounds [268435434 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %ptr + ret void +} + +; The only frame size that can be allocated using a single AGFI but which +; must be freed using two instructions. +define void @f6(i64 %x) { +; CHECK: f6: +; CHECK: agfi %r15, -2147483648 +; CHECK: stg %r2, 168(%r15) +; CHECK: agfi %r15, 2147483640 +; CHECK: aghi %r15, 8 +; CHECK: br %r14 + %y = alloca [268435435 x i64], align 8 + %ptr = getelementptr inbounds [268435435 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %ptr + ret void +} + +; The smallest frame size that needs two instructions to both allocate +; and free the frame. +define void @f7(i64 %x) { +; CHECK: f7: +; CHECK: agfi %r15, -2147483648 +; CHECK: aghi %r15, -8 +; CHECK: stg %r2, 168(%r15) +; CHECK: agfi %r15, 2147483640 +; CHECK: aghi %r15, 16 +; CHECK: br %r14 + %y = alloca [268435436 x i64], align 8 + %ptr = getelementptr inbounds [268435436 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %ptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-02.ll b/test/CodeGen/SystemZ/frame-02.ll new file mode 100644 index 0000000..589703e --- /dev/null +++ b/test/CodeGen/SystemZ/frame-02.ll @@ -0,0 +1,257 @@ +; Test saving and restoring of call-saved FPRs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This function should require all FPRs, but no other spill slots. +; We need to save and restore 8 of the 16 FPRs, so the frame size +; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 +; (the caller-allocated part of the frame) + 224. +define void @f1(float *%ptr) { +; CHECK: f1: +; CHECK: aghi %r15, -224 +; CHECK: .cfi_def_cfa_offset 384 +; CHECK: std %f8, 216(%r15) +; CHECK: std %f9, 208(%r15) +; CHECK: std %f10, 200(%r15) +; CHECK: std %f11, 192(%r15) +; CHECK: std %f12, 184(%r15) +; CHECK: std %f13, 176(%r15) +; CHECK: std %f14, 168(%r15) +; CHECK: std %f15, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK: .cfi_offset %f15, -224 +; ...main function body... +; CHECK: ld %f8, 216(%r15) +; CHECK: ld %f9, 208(%r15) +; CHECK: ld %f10, 200(%r15) +; CHECK: ld %f11, 192(%r15) +; CHECK: ld %f12, 184(%r15) +; CHECK: ld %f13, 176(%r15) +; CHECK: ld %f14, 168(%r15) +; CHECK: ld %f15, 160(%r15) +; CHECK: aghi %r15, 224 +; CHECK: br %r14 + %l0 = load volatile float *%ptr + %l1 = load volatile float *%ptr + %l2 = load volatile float *%ptr + %l3 = load volatile float *%ptr + %l4 = load volatile float *%ptr + %l5 = load volatile float *%ptr + %l6 = load volatile float *%ptr + %l7 = load volatile float *%ptr + %l8 = load volatile float *%ptr + %l9 = load volatile float *%ptr + %l10 = load volatile float *%ptr + %l11 = load volatile float *%ptr + %l12 = load volatile float *%ptr + %l13 = load volatile float *%ptr + %l14 = load volatile float *%ptr + %l15 = load volatile float *%ptr + %add0 = fadd float %l0, %l0 + %add1 = fadd float %l1, %add0 + %add2 = fadd float %l2, %add1 + %add3 = fadd float %l3, %add2 + %add4 = fadd float %l4, %add3 + %add5 = fadd float %l5, %add4 + %add6 = fadd float %l6, %add5 + %add7 = fadd float %l7, %add6 + %add8 = fadd float %l8, %add7 + %add9 = fadd float %l9, %add8 + %add10 = fadd float %l10, %add9 + %add11 = fadd float %l11, %add10 + %add12 = fadd float %l12, %add11 + %add13 = fadd float %l13, %add12 + %add14 = fadd float %l14, %add13 + %add15 = fadd float %l15, %add14 + store volatile float %add0, float *%ptr + store volatile float %add1, float *%ptr + store volatile float %add2, float *%ptr + store volatile float %add3, float *%ptr + store volatile float %add4, float *%ptr + store volatile float %add5, float *%ptr + store volatile float %add6, float *%ptr + store volatile float %add7, float *%ptr + store volatile float %add8, float *%ptr + store volatile float %add9, float *%ptr + store volatile float %add10, float *%ptr + store volatile float %add11, float *%ptr + store volatile float %add12, float *%ptr + store volatile float %add13, float *%ptr + store volatile float %add14, float *%ptr + store volatile float %add15, float *%ptr + ret void +} + +; Like f1, but requires one fewer FPR. We allocate in numerical order, +; so %f15 is the one that gets dropped. +define void @f2(float *%ptr) { +; CHECK: f2: +; CHECK: aghi %r15, -216 +; CHECK: .cfi_def_cfa_offset 376 +; CHECK: std %f8, 208(%r15) +; CHECK: std %f9, 200(%r15) +; CHECK: std %f10, 192(%r15) +; CHECK: std %f11, 184(%r15) +; CHECK: std %f12, 176(%r15) +; CHECK: std %f13, 168(%r15) +; CHECK: std %f14, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK-NOT: %f15 +; ...main function body... +; CHECK: ld %f8, 208(%r15) +; CHECK: ld %f9, 200(%r15) +; CHECK: ld %f10, 192(%r15) +; CHECK: ld %f11, 184(%r15) +; CHECK: ld %f12, 176(%r15) +; CHECK: ld %f13, 168(%r15) +; CHECK: ld %f14, 160(%r15) +; CHECK: aghi %r15, 216 +; CHECK: br %r14 + %l0 = load volatile float *%ptr + %l1 = load volatile float *%ptr + %l2 = load volatile float *%ptr + %l3 = load volatile float *%ptr + %l4 = load volatile float *%ptr + %l5 = load volatile float *%ptr + %l6 = load volatile float *%ptr + %l7 = load volatile float *%ptr + %l8 = load volatile float *%ptr + %l9 = load volatile float *%ptr + %l10 = load volatile float *%ptr + %l11 = load volatile float *%ptr + %l12 = load volatile float *%ptr + %l13 = load volatile float *%ptr + %l14 = load volatile float *%ptr + %add0 = fadd float %l0, %l0 + %add1 = fadd float %l1, %add0 + %add2 = fadd float %l2, %add1 + %add3 = fadd float %l3, %add2 + %add4 = fadd float %l4, %add3 + %add5 = fadd float %l5, %add4 + %add6 = fadd float %l6, %add5 + %add7 = fadd float %l7, %add6 + %add8 = fadd float %l8, %add7 + %add9 = fadd float %l9, %add8 + %add10 = fadd float %l10, %add9 + %add11 = fadd float %l11, %add10 + %add12 = fadd float %l12, %add11 + %add13 = fadd float %l13, %add12 + %add14 = fadd float %l14, %add13 + store volatile float %add0, float *%ptr + store volatile float %add1, float *%ptr + store volatile float %add2, float *%ptr + store volatile float %add3, float *%ptr + store volatile float %add4, float *%ptr + store volatile float %add5, float *%ptr + store volatile float %add6, float *%ptr + store volatile float %add7, float *%ptr + store volatile float %add8, float *%ptr + store volatile float %add9, float *%ptr + store volatile float %add10, float *%ptr + store volatile float %add11, float *%ptr + store volatile float %add12, float *%ptr + store volatile float %add13, float *%ptr + store volatile float %add14, float *%ptr + ret void +} + +; Like f1, but should require only one call-saved FPR. +define void @f3(float *%ptr) { +; CHECK: f3: +; CHECK: aghi %r15, -168 +; CHECK: .cfi_def_cfa_offset 328 +; CHECK: std %f8, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK-NOT: %f9 +; CHECK-NOT: %f10 +; CHECK-NOT: %f11 +; CHECK-NOT: %f12 +; CHECK-NOT: %f13 +; CHECK-NOT: %f14 +; CHECK-NOT: %f15 +; ...main function body... +; CHECK: ld %f8, 160(%r15) +; CHECK: aghi %r15, 168 +; CHECK: br %r14 + %l0 = load volatile float *%ptr + %l1 = load volatile float *%ptr + %l2 = load volatile float *%ptr + %l3 = load volatile float *%ptr + %l4 = load volatile float *%ptr + %l5 = load volatile float *%ptr + %l6 = load volatile float *%ptr + %l7 = load volatile float *%ptr + %l8 = load volatile float *%ptr + %add0 = fadd float %l0, %l0 + %add1 = fadd float %l1, %add0 + %add2 = fadd float %l2, %add1 + %add3 = fadd float %l3, %add2 + %add4 = fadd float %l4, %add3 + %add5 = fadd float %l5, %add4 + %add6 = fadd float %l6, %add5 + %add7 = fadd float %l7, %add6 + %add8 = fadd float %l8, %add7 + store volatile float %add0, float *%ptr + store volatile float %add1, float *%ptr + store volatile float %add2, float *%ptr + store volatile float %add3, float *%ptr + store volatile float %add4, float *%ptr + store volatile float %add5, float *%ptr + store volatile float %add6, float *%ptr + store volatile float %add7, float *%ptr + store volatile float %add8, float *%ptr + ret void +} + +; This function should use all call-clobbered FPRs but no call-saved ones. +; It shouldn't need to create a frame. +define void @f4(float *%ptr) { +; CHECK: f4: +; CHECK-NOT: %r15 +; CHECK-NOT: %f8 +; CHECK-NOT: %f9 +; CHECK-NOT: %f10 +; CHECK-NOT: %f11 +; CHECK-NOT: %f12 +; CHECK-NOT: %f13 +; CHECK-NOT: %f14 +; CHECK-NOT: %f15 +; CHECK: br %r14 + %l0 = load volatile float *%ptr + %l1 = load volatile float *%ptr + %l2 = load volatile float *%ptr + %l3 = load volatile float *%ptr + %l4 = load volatile float *%ptr + %l5 = load volatile float *%ptr + %l6 = load volatile float *%ptr + %l7 = load volatile float *%ptr + %add0 = fadd float %l0, %l0 + %add1 = fadd float %l1, %add0 + %add2 = fadd float %l2, %add1 + %add3 = fadd float %l3, %add2 + %add4 = fadd float %l4, %add3 + %add5 = fadd float %l5, %add4 + %add6 = fadd float %l6, %add5 + %add7 = fadd float %l7, %add6 + store volatile float %add0, float *%ptr + store volatile float %add1, float *%ptr + store volatile float %add2, float *%ptr + store volatile float %add3, float *%ptr + store volatile float %add4, float *%ptr + store volatile float %add5, float *%ptr + store volatile float %add6, float *%ptr + store volatile float %add7, float *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-03.ll b/test/CodeGen/SystemZ/frame-03.ll new file mode 100644 index 0000000..3c4a499 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-03.ll @@ -0,0 +1,259 @@ +; Like frame-02.ll, but with doubles rather than floats. Internally this +; uses a different register class, but the set of saved and restored +; registers should be the same. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This function should require all FPRs, but no other spill slots. +; We need to save and restore 8 of the 16 FPRs, so the frame size +; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 +; (the caller-allocated part of the frame) + 224. +define void @f1(double *%ptr) { +; CHECK: f1: +; CHECK: aghi %r15, -224 +; CHECK: .cfi_def_cfa_offset 384 +; CHECK: std %f8, 216(%r15) +; CHECK: std %f9, 208(%r15) +; CHECK: std %f10, 200(%r15) +; CHECK: std %f11, 192(%r15) +; CHECK: std %f12, 184(%r15) +; CHECK: std %f13, 176(%r15) +; CHECK: std %f14, 168(%r15) +; CHECK: std %f15, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK: .cfi_offset %f15, -224 +; ...main function body... +; CHECK: ld %f8, 216(%r15) +; CHECK: ld %f9, 208(%r15) +; CHECK: ld %f10, 200(%r15) +; CHECK: ld %f11, 192(%r15) +; CHECK: ld %f12, 184(%r15) +; CHECK: ld %f13, 176(%r15) +; CHECK: ld %f14, 168(%r15) +; CHECK: ld %f15, 160(%r15) +; CHECK: aghi %r15, 224 +; CHECK: br %r14 + %l0 = load volatile double *%ptr + %l1 = load volatile double *%ptr + %l2 = load volatile double *%ptr + %l3 = load volatile double *%ptr + %l4 = load volatile double *%ptr + %l5 = load volatile double *%ptr + %l6 = load volatile double *%ptr + %l7 = load volatile double *%ptr + %l8 = load volatile double *%ptr + %l9 = load volatile double *%ptr + %l10 = load volatile double *%ptr + %l11 = load volatile double *%ptr + %l12 = load volatile double *%ptr + %l13 = load volatile double *%ptr + %l14 = load volatile double *%ptr + %l15 = load volatile double *%ptr + %add0 = fadd double %l0, %l0 + %add1 = fadd double %l1, %add0 + %add2 = fadd double %l2, %add1 + %add3 = fadd double %l3, %add2 + %add4 = fadd double %l4, %add3 + %add5 = fadd double %l5, %add4 + %add6 = fadd double %l6, %add5 + %add7 = fadd double %l7, %add6 + %add8 = fadd double %l8, %add7 + %add9 = fadd double %l9, %add8 + %add10 = fadd double %l10, %add9 + %add11 = fadd double %l11, %add10 + %add12 = fadd double %l12, %add11 + %add13 = fadd double %l13, %add12 + %add14 = fadd double %l14, %add13 + %add15 = fadd double %l15, %add14 + store volatile double %add0, double *%ptr + store volatile double %add1, double *%ptr + store volatile double %add2, double *%ptr + store volatile double %add3, double *%ptr + store volatile double %add4, double *%ptr + store volatile double %add5, double *%ptr + store volatile double %add6, double *%ptr + store volatile double %add7, double *%ptr + store volatile double %add8, double *%ptr + store volatile double %add9, double *%ptr + store volatile double %add10, double *%ptr + store volatile double %add11, double *%ptr + store volatile double %add12, double *%ptr + store volatile double %add13, double *%ptr + store volatile double %add14, double *%ptr + store volatile double %add15, double *%ptr + ret void +} + +; Like f1, but requires one fewer FPR. We allocate in numerical order, +; so %f15 is the one that gets dropped. +define void @f2(double *%ptr) { +; CHECK: f2: +; CHECK: aghi %r15, -216 +; CHECK: .cfi_def_cfa_offset 376 +; CHECK: std %f8, 208(%r15) +; CHECK: std %f9, 200(%r15) +; CHECK: std %f10, 192(%r15) +; CHECK: std %f11, 184(%r15) +; CHECK: std %f12, 176(%r15) +; CHECK: std %f13, 168(%r15) +; CHECK: std %f14, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK-NOT: %f15 +; ...main function body... +; CHECK: ld %f8, 208(%r15) +; CHECK: ld %f9, 200(%r15) +; CHECK: ld %f10, 192(%r15) +; CHECK: ld %f11, 184(%r15) +; CHECK: ld %f12, 176(%r15) +; CHECK: ld %f13, 168(%r15) +; CHECK: ld %f14, 160(%r15) +; CHECK: aghi %r15, 216 +; CHECK: br %r14 + %l0 = load volatile double *%ptr + %l1 = load volatile double *%ptr + %l2 = load volatile double *%ptr + %l3 = load volatile double *%ptr + %l4 = load volatile double *%ptr + %l5 = load volatile double *%ptr + %l6 = load volatile double *%ptr + %l7 = load volatile double *%ptr + %l8 = load volatile double *%ptr + %l9 = load volatile double *%ptr + %l10 = load volatile double *%ptr + %l11 = load volatile double *%ptr + %l12 = load volatile double *%ptr + %l13 = load volatile double *%ptr + %l14 = load volatile double *%ptr + %add0 = fadd double %l0, %l0 + %add1 = fadd double %l1, %add0 + %add2 = fadd double %l2, %add1 + %add3 = fadd double %l3, %add2 + %add4 = fadd double %l4, %add3 + %add5 = fadd double %l5, %add4 + %add6 = fadd double %l6, %add5 + %add7 = fadd double %l7, %add6 + %add8 = fadd double %l8, %add7 + %add9 = fadd double %l9, %add8 + %add10 = fadd double %l10, %add9 + %add11 = fadd double %l11, %add10 + %add12 = fadd double %l12, %add11 + %add13 = fadd double %l13, %add12 + %add14 = fadd double %l14, %add13 + store volatile double %add0, double *%ptr + store volatile double %add1, double *%ptr + store volatile double %add2, double *%ptr + store volatile double %add3, double *%ptr + store volatile double %add4, double *%ptr + store volatile double %add5, double *%ptr + store volatile double %add6, double *%ptr + store volatile double %add7, double *%ptr + store volatile double %add8, double *%ptr + store volatile double %add9, double *%ptr + store volatile double %add10, double *%ptr + store volatile double %add11, double *%ptr + store volatile double %add12, double *%ptr + store volatile double %add13, double *%ptr + store volatile double %add14, double *%ptr + ret void +} + +; Like f1, but should require only one call-saved FPR. +define void @f3(double *%ptr) { +; CHECK: f3: +; CHECK: aghi %r15, -168 +; CHECK: .cfi_def_cfa_offset 328 +; CHECK: std %f8, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK-NOT: %f9 +; CHECK-NOT: %f10 +; CHECK-NOT: %f11 +; CHECK-NOT: %f12 +; CHECK-NOT: %f13 +; CHECK-NOT: %f14 +; CHECK-NOT: %f15 +; ...main function body... +; CHECK: ld %f8, 160(%r15) +; CHECK: aghi %r15, 168 +; CHECK: br %r14 + %l0 = load volatile double *%ptr + %l1 = load volatile double *%ptr + %l2 = load volatile double *%ptr + %l3 = load volatile double *%ptr + %l4 = load volatile double *%ptr + %l5 = load volatile double *%ptr + %l6 = load volatile double *%ptr + %l7 = load volatile double *%ptr + %l8 = load volatile double *%ptr + %add0 = fadd double %l0, %l0 + %add1 = fadd double %l1, %add0 + %add2 = fadd double %l2, %add1 + %add3 = fadd double %l3, %add2 + %add4 = fadd double %l4, %add3 + %add5 = fadd double %l5, %add4 + %add6 = fadd double %l6, %add5 + %add7 = fadd double %l7, %add6 + %add8 = fadd double %l8, %add7 + store volatile double %add0, double *%ptr + store volatile double %add1, double *%ptr + store volatile double %add2, double *%ptr + store volatile double %add3, double *%ptr + store volatile double %add4, double *%ptr + store volatile double %add5, double *%ptr + store volatile double %add6, double *%ptr + store volatile double %add7, double *%ptr + store volatile double %add8, double *%ptr + ret void +} + +; This function should use all call-clobbered FPRs but no call-saved ones. +; It shouldn't need to create a frame. +define void @f4(double *%ptr) { +; CHECK: f4: +; CHECK-NOT: %r15 +; CHECK-NOT: %f8 +; CHECK-NOT: %f9 +; CHECK-NOT: %f10 +; CHECK-NOT: %f11 +; CHECK-NOT: %f12 +; CHECK-NOT: %f13 +; CHECK-NOT: %f14 +; CHECK-NOT: %f15 +; CHECK: br %r14 + %l0 = load volatile double *%ptr + %l1 = load volatile double *%ptr + %l2 = load volatile double *%ptr + %l3 = load volatile double *%ptr + %l4 = load volatile double *%ptr + %l5 = load volatile double *%ptr + %l6 = load volatile double *%ptr + %l7 = load volatile double *%ptr + %add0 = fadd double %l0, %l0 + %add1 = fadd double %l1, %add0 + %add2 = fadd double %l2, %add1 + %add3 = fadd double %l3, %add2 + %add4 = fadd double %l4, %add3 + %add5 = fadd double %l5, %add4 + %add6 = fadd double %l6, %add5 + %add7 = fadd double %l7, %add6 + store volatile double %add0, double *%ptr + store volatile double %add1, double *%ptr + store volatile double %add2, double *%ptr + store volatile double %add3, double *%ptr + store volatile double %add4, double *%ptr + store volatile double %add5, double *%ptr + store volatile double %add6, double *%ptr + store volatile double %add7, double *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-04.ll b/test/CodeGen/SystemZ/frame-04.ll new file mode 100644 index 0000000..360f85c --- /dev/null +++ b/test/CodeGen/SystemZ/frame-04.ll @@ -0,0 +1,187 @@ +; Like frame-02.ll, but with long doubles rather than floats. Some of the +; cases are slightly different because we need to allocate pairs of FPRs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This function should require all FPRs, but no other spill slots. +; We need to save and restore 8 of the 16 FPRs, so the frame size +; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 +; (the caller-allocated part of the frame) + 224. +define void @f1(fp128 *%ptr) { +; CHECK: f1: +; CHECK: aghi %r15, -224 +; CHECK: .cfi_def_cfa_offset 384 +; CHECK: std %f8, 216(%r15) +; CHECK: std %f9, 208(%r15) +; CHECK: std %f10, 200(%r15) +; CHECK: std %f11, 192(%r15) +; CHECK: std %f12, 184(%r15) +; CHECK: std %f13, 176(%r15) +; CHECK: std %f14, 168(%r15) +; CHECK: std %f15, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK: .cfi_offset %f15, -224 +; ...main function body... +; CHECK: ld %f8, 216(%r15) +; CHECK: ld %f9, 208(%r15) +; CHECK: ld %f10, 200(%r15) +; CHECK: ld %f11, 192(%r15) +; CHECK: ld %f12, 184(%r15) +; CHECK: ld %f13, 176(%r15) +; CHECK: ld %f14, 168(%r15) +; CHECK: ld %f15, 160(%r15) +; CHECK: aghi %r15, 224 +; CHECK: br %r14 + %l0 = load volatile fp128 *%ptr + %l1 = load volatile fp128 *%ptr + %l4 = load volatile fp128 *%ptr + %l5 = load volatile fp128 *%ptr + %l8 = load volatile fp128 *%ptr + %l9 = load volatile fp128 *%ptr + %l12 = load volatile fp128 *%ptr + %l13 = load volatile fp128 *%ptr + %add0 = fadd fp128 %l0, %l0 + %add1 = fadd fp128 %l1, %add0 + %add4 = fadd fp128 %l4, %add1 + %add5 = fadd fp128 %l5, %add4 + %add8 = fadd fp128 %l8, %add5 + %add9 = fadd fp128 %l9, %add8 + %add12 = fadd fp128 %l12, %add9 + %add13 = fadd fp128 %l13, %add12 + store volatile fp128 %add0, fp128 *%ptr + store volatile fp128 %add1, fp128 *%ptr + store volatile fp128 %add4, fp128 *%ptr + store volatile fp128 %add5, fp128 *%ptr + store volatile fp128 %add8, fp128 *%ptr + store volatile fp128 %add9, fp128 *%ptr + store volatile fp128 %add12, fp128 *%ptr + store volatile fp128 %add13, fp128 *%ptr + ret void +} + +; Like f1, but requires one fewer FPR pair. We allocate in numerical order, +; so %f13+%f15 is the pair that gets dropped. +define void @f2(fp128 *%ptr) { +; CHECK: f2: +; CHECK: aghi %r15, -208 +; CHECK: .cfi_def_cfa_offset 368 +; CHECK: std %f8, 200(%r15) +; CHECK: std %f9, 192(%r15) +; CHECK: std %f10, 184(%r15) +; CHECK: std %f11, 176(%r15) +; CHECK: std %f12, 168(%r15) +; CHECK: std %f14, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f14, -208 +; CHECK-NOT: %f13 +; CHECK-NOT: %f15 +; ...main function body... +; CHECK: ld %f8, 200(%r15) +; CHECK: ld %f9, 192(%r15) +; CHECK: ld %f10, 184(%r15) +; CHECK: ld %f11, 176(%r15) +; CHECK: ld %f12, 168(%r15) +; CHECK: ld %f14, 160(%r15) +; CHECK: aghi %r15, 208 +; CHECK: br %r14 + %l0 = load volatile fp128 *%ptr + %l1 = load volatile fp128 *%ptr + %l4 = load volatile fp128 *%ptr + %l5 = load volatile fp128 *%ptr + %l8 = load volatile fp128 *%ptr + %l9 = load volatile fp128 *%ptr + %l12 = load volatile fp128 *%ptr + %add0 = fadd fp128 %l0, %l0 + %add1 = fadd fp128 %l1, %add0 + %add4 = fadd fp128 %l4, %add1 + %add5 = fadd fp128 %l5, %add4 + %add8 = fadd fp128 %l8, %add5 + %add9 = fadd fp128 %l9, %add8 + %add12 = fadd fp128 %l12, %add9 + store volatile fp128 %add0, fp128 *%ptr + store volatile fp128 %add1, fp128 *%ptr + store volatile fp128 %add4, fp128 *%ptr + store volatile fp128 %add5, fp128 *%ptr + store volatile fp128 %add8, fp128 *%ptr + store volatile fp128 %add9, fp128 *%ptr + store volatile fp128 %add12, fp128 *%ptr + ret void +} + +; Like f1, but requires only one call-saved FPR pair. We allocate in +; numerical order so the pair should be %f8+%f10. +define void @f3(fp128 *%ptr) { +; CHECK: f3: +; CHECK: aghi %r15, -176 +; CHECK: .cfi_def_cfa_offset 336 +; CHECK: std %f8, 168(%r15) +; CHECK: std %f10, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f10, -176 +; CHECK-NOT: %f9 +; CHECK-NOT: %f11 +; CHECK-NOT: %f12 +; CHECK-NOT: %f13 +; CHECK-NOT: %f14 +; CHECK-NOT: %f15 +; ...main function body... +; CHECK: ld %f8, 168(%r15) +; CHECK: ld %f10, 160(%r15) +; CHECK: aghi %r15, 176 +; CHECK: br %r14 + %l0 = load volatile fp128 *%ptr + %l1 = load volatile fp128 *%ptr + %l4 = load volatile fp128 *%ptr + %l5 = load volatile fp128 *%ptr + %l8 = load volatile fp128 *%ptr + %add0 = fadd fp128 %l0, %l0 + %add1 = fadd fp128 %l1, %add0 + %add4 = fadd fp128 %l4, %add1 + %add5 = fadd fp128 %l5, %add4 + %add8 = fadd fp128 %l8, %add5 + store volatile fp128 %add0, fp128 *%ptr + store volatile fp128 %add1, fp128 *%ptr + store volatile fp128 %add4, fp128 *%ptr + store volatile fp128 %add5, fp128 *%ptr + store volatile fp128 %add8, fp128 *%ptr + ret void +} + +; This function should use all call-clobbered FPRs but no call-saved ones. +; It shouldn't need to create a frame. +define void @f4(fp128 *%ptr) { +; CHECK: f4: +; CHECK-NOT: %r15 +; CHECK-NOT: %f8 +; CHECK-NOT: %f9 +; CHECK-NOT: %f10 +; CHECK-NOT: %f11 +; CHECK-NOT: %f12 +; CHECK-NOT: %f13 +; CHECK-NOT: %f14 +; CHECK-NOT: %f15 +; CHECK: br %r14 + %l0 = load volatile fp128 *%ptr + %l1 = load volatile fp128 *%ptr + %l4 = load volatile fp128 *%ptr + %l5 = load volatile fp128 *%ptr + %add0 = fadd fp128 %l0, %l0 + %add1 = fadd fp128 %l1, %add0 + %add4 = fadd fp128 %l4, %add1 + %add5 = fadd fp128 %l5, %add4 + store volatile fp128 %add0, fp128 *%ptr + store volatile fp128 %add1, fp128 *%ptr + store volatile fp128 %add4, fp128 *%ptr + store volatile fp128 %add5, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-05.ll b/test/CodeGen/SystemZ/frame-05.ll new file mode 100644 index 0000000..3a159fc --- /dev/null +++ b/test/CodeGen/SystemZ/frame-05.ll @@ -0,0 +1,219 @@ +; Test saving and restoring of call-saved GPRs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This function should require all GPRs, but no other spill slots. The caller +; allocates room for the GPR save slots, so we shouldn't need to allocate any +; extra space. +; +; The function only modifies the low 32 bits of each register, which in +; itself would allow STM and LM to be used instead of STMG and LMG. +; However, the ABI defines the offset of each register, so we always +; use the 64-bit form. +; +; Use a different address for the final store, so that we can check that +; %r15 isn't referenced again until after that. +define void @f1(i32 *%ptr) { +; CHECK: f1: +; CHECK: stmg %r6, %r15, 48(%r15) +; CHECK-NOT: %r15 +; CHECK: .cfi_offset %r6, -112 +; CHECK: .cfi_offset %r7, -104 +; CHECK: .cfi_offset %r8, -96 +; CHECK: .cfi_offset %r9, -88 +; CHECK: .cfi_offset %r10, -80 +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r12, -64 +; CHECK: .cfi_offset %r13, -56 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; ...main function body... +; CHECK-NOT: %r15 +; CHECK: st {{.*}}, 4(%r2) +; CHECK: lmg %r6, %r15, 48(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l3 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l6 = load volatile i32 *%ptr + %l7 = load volatile i32 *%ptr + %l8 = load volatile i32 *%ptr + %l9 = load volatile i32 *%ptr + %l10 = load volatile i32 *%ptr + %l11 = load volatile i32 *%ptr + %l12 = load volatile i32 *%ptr + %l13 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add3 = add i32 %l3, %add1 + %add4 = add i32 %l4, %add3 + %add5 = add i32 %l5, %add4 + %add6 = add i32 %l6, %add5 + %add7 = add i32 %l7, %add6 + %add8 = add i32 %l8, %add7 + %add9 = add i32 %l9, %add8 + %add10 = add i32 %l10, %add9 + %add11 = add i32 %l11, %add10 + %add12 = add i32 %l12, %add11 + %add13 = add i32 %l13, %add12 + %add14 = add i32 %l14, %add13 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add3, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add6, i32 *%ptr + store volatile i32 %add7, i32 *%ptr + store volatile i32 %add8, i32 *%ptr + store volatile i32 %add9, i32 *%ptr + store volatile i32 %add10, i32 *%ptr + store volatile i32 %add11, i32 *%ptr + store volatile i32 %add12, i32 *%ptr + store volatile i32 %add13, i32 *%ptr + %final = getelementptr i32 *%ptr, i32 1 + store volatile i32 %add14, i32 *%final + ret void +} + +; Like f1, but requires one fewer GPR. We allocate the call-saved GPRs +; from %r14 down, so that the STMG/LMG sequences aren't any longer than +; they need to be. +define void @f2(i32 *%ptr) { +; CHECK: f2: +; CHECK: stmg %r7, %r15, 56(%r15) +; CHECK-NOT: %r15 +; CHECK: .cfi_offset %r7, -104 +; CHECK: .cfi_offset %r8, -96 +; CHECK: .cfi_offset %r9, -88 +; CHECK: .cfi_offset %r10, -80 +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r12, -64 +; CHECK: .cfi_offset %r13, -56 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; ...main function body... +; CHECK-NOT: %r15 +; CHECK-NOT: %r6 +; CHECK: st {{.*}}, 4(%r2) +; CHECK: lmg %r7, %r15, 56(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l3 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l7 = load volatile i32 *%ptr + %l8 = load volatile i32 *%ptr + %l9 = load volatile i32 *%ptr + %l10 = load volatile i32 *%ptr + %l11 = load volatile i32 *%ptr + %l12 = load volatile i32 *%ptr + %l13 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add3 = add i32 %l3, %add1 + %add4 = add i32 %l4, %add3 + %add5 = add i32 %l5, %add4 + %add7 = add i32 %l7, %add5 + %add8 = add i32 %l8, %add7 + %add9 = add i32 %l9, %add8 + %add10 = add i32 %l10, %add9 + %add11 = add i32 %l11, %add10 + %add12 = add i32 %l12, %add11 + %add13 = add i32 %l13, %add12 + %add14 = add i32 %l14, %add13 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add3, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add7, i32 *%ptr + store volatile i32 %add8, i32 *%ptr + store volatile i32 %add9, i32 *%ptr + store volatile i32 %add10, i32 *%ptr + store volatile i32 %add11, i32 *%ptr + store volatile i32 %add12, i32 *%ptr + store volatile i32 %add13, i32 *%ptr + %final = getelementptr i32 *%ptr, i32 1 + store volatile i32 %add14, i32 *%final + ret void +} + +; Like f1, but only needs one call-saved GPR, which ought to be %r14. +define void @f3(i32 *%ptr) { +; CHECK: f3: +; CHECK: stmg %r14, %r15, 112(%r15) +; CHECK-NOT: %r15 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; ...main function body... +; CHECK-NOT: %r15 +; CHECK-NOT: %r6 +; CHECK-NOT: %r7 +; CHECK-NOT: %r8 +; CHECK-NOT: %r9 +; CHECK-NOT: %r10 +; CHECK-NOT: %r11 +; CHECK-NOT: %r12 +; CHECK-NOT: %r13 +; CHECK: st {{.*}}, 4(%r2) +; CHECK: lmg %r14, %r15, 112(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l3 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add3 = add i32 %l3, %add1 + %add4 = add i32 %l4, %add3 + %add5 = add i32 %l5, %add4 + %add14 = add i32 %l14, %add5 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add3, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + %final = getelementptr i32 *%ptr, i32 1 + store volatile i32 %add14, i32 *%final + ret void +} + +; This function should use all call-clobbered GPRs but no call-saved ones. +; It shouldn't need to touch the stack at all. +define void @f4(i32 *%ptr) { +; CHECK: f4: +; CHECK-NOT: %r15 +; CHECK-NOT: %r6 +; CHECK-NOT: %r7 +; CHECK-NOT: %r8 +; CHECK-NOT: %r9 +; CHECK-NOT: %r10 +; CHECK-NOT: %r11 +; CHECK-NOT: %r12 +; CHECK-NOT: %r13 +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l3 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add3 = add i32 %l3, %add1 + %add4 = add i32 %l4, %add3 + %add5 = add i32 %l5, %add4 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add3, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + %final = getelementptr i32 *%ptr, i32 1 + store volatile i32 %add5, i32 *%final + ret void +} diff --git a/test/CodeGen/SystemZ/frame-06.ll b/test/CodeGen/SystemZ/frame-06.ll new file mode 100644 index 0000000..4c361f1 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-06.ll @@ -0,0 +1,216 @@ +; Like frame-05.ll, but with i64s rather than i32s. Internally this +; uses a different register class, but the set of saved and restored +; registers should be the same. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This function should require all GPRs, but no other spill slots. The caller +; allocates room for the GPR save slots, so we shouldn't need to allocate any +; extra space. +; +; Use a different address for the final store, so that we can check that +; %r15 isn't referenced again until after that. +define void @f1(i64 *%ptr) { +; CHECK: f1: +; CHECK: stmg %r6, %r15, 48(%r15) +; CHECK-NOT: %r15 +; CHECK: .cfi_offset %r6, -112 +; CHECK: .cfi_offset %r7, -104 +; CHECK: .cfi_offset %r8, -96 +; CHECK: .cfi_offset %r9, -88 +; CHECK: .cfi_offset %r10, -80 +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r12, -64 +; CHECK: .cfi_offset %r13, -56 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; ...main function body... +; CHECK-NOT: %r15 +; CHECK: stg {{.*}}, 8(%r2) +; CHECK: lmg %r6, %r15, 48(%r15) +; CHECK: br %r14 + %l0 = load volatile i64 *%ptr + %l1 = load volatile i64 *%ptr + %l3 = load volatile i64 *%ptr + %l4 = load volatile i64 *%ptr + %l5 = load volatile i64 *%ptr + %l6 = load volatile i64 *%ptr + %l7 = load volatile i64 *%ptr + %l8 = load volatile i64 *%ptr + %l9 = load volatile i64 *%ptr + %l10 = load volatile i64 *%ptr + %l11 = load volatile i64 *%ptr + %l12 = load volatile i64 *%ptr + %l13 = load volatile i64 *%ptr + %l14 = load volatile i64 *%ptr + %add0 = add i64 %l0, %l0 + %add1 = add i64 %l1, %add0 + %add3 = add i64 %l3, %add1 + %add4 = add i64 %l4, %add3 + %add5 = add i64 %l5, %add4 + %add6 = add i64 %l6, %add5 + %add7 = add i64 %l7, %add6 + %add8 = add i64 %l8, %add7 + %add9 = add i64 %l9, %add8 + %add10 = add i64 %l10, %add9 + %add11 = add i64 %l11, %add10 + %add12 = add i64 %l12, %add11 + %add13 = add i64 %l13, %add12 + %add14 = add i64 %l14, %add13 + store volatile i64 %add0, i64 *%ptr + store volatile i64 %add1, i64 *%ptr + store volatile i64 %add3, i64 *%ptr + store volatile i64 %add4, i64 *%ptr + store volatile i64 %add5, i64 *%ptr + store volatile i64 %add6, i64 *%ptr + store volatile i64 %add7, i64 *%ptr + store volatile i64 %add8, i64 *%ptr + store volatile i64 %add9, i64 *%ptr + store volatile i64 %add10, i64 *%ptr + store volatile i64 %add11, i64 *%ptr + store volatile i64 %add12, i64 *%ptr + store volatile i64 %add13, i64 *%ptr + %final = getelementptr i64 *%ptr, i64 1 + store volatile i64 %add14, i64 *%final + ret void +} + +; Like f1, but requires one fewer GPR. We allocate the call-saved GPRs +; from %r14 down, so that the STMG/LMG sequences aren't any longer than +; they need to be. +define void @f2(i64 *%ptr) { +; CHECK: f2: +; CHECK: stmg %r7, %r15, 56(%r15) +; CHECK-NOT: %r15 +; CHECK: .cfi_offset %r7, -104 +; CHECK: .cfi_offset %r8, -96 +; CHECK: .cfi_offset %r9, -88 +; CHECK: .cfi_offset %r10, -80 +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r12, -64 +; CHECK: .cfi_offset %r13, -56 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; ...main function body... +; CHECK-NOT: %r15 +; CHECK-NOT: %r6 +; CHECK: stg {{.*}}, 8(%r2) +; CHECK: lmg %r7, %r15, 56(%r15) +; CHECK: br %r14 + %l0 = load volatile i64 *%ptr + %l1 = load volatile i64 *%ptr + %l3 = load volatile i64 *%ptr + %l4 = load volatile i64 *%ptr + %l5 = load volatile i64 *%ptr + %l7 = load volatile i64 *%ptr + %l8 = load volatile i64 *%ptr + %l9 = load volatile i64 *%ptr + %l10 = load volatile i64 *%ptr + %l11 = load volatile i64 *%ptr + %l12 = load volatile i64 *%ptr + %l13 = load volatile i64 *%ptr + %l14 = load volatile i64 *%ptr + %add0 = add i64 %l0, %l0 + %add1 = add i64 %l1, %add0 + %add3 = add i64 %l3, %add1 + %add4 = add i64 %l4, %add3 + %add5 = add i64 %l5, %add4 + %add7 = add i64 %l7, %add5 + %add8 = add i64 %l8, %add7 + %add9 = add i64 %l9, %add8 + %add10 = add i64 %l10, %add9 + %add11 = add i64 %l11, %add10 + %add12 = add i64 %l12, %add11 + %add13 = add i64 %l13, %add12 + %add14 = add i64 %l14, %add13 + store volatile i64 %add0, i64 *%ptr + store volatile i64 %add1, i64 *%ptr + store volatile i64 %add3, i64 *%ptr + store volatile i64 %add4, i64 *%ptr + store volatile i64 %add5, i64 *%ptr + store volatile i64 %add7, i64 *%ptr + store volatile i64 %add8, i64 *%ptr + store volatile i64 %add9, i64 *%ptr + store volatile i64 %add10, i64 *%ptr + store volatile i64 %add11, i64 *%ptr + store volatile i64 %add12, i64 *%ptr + store volatile i64 %add13, i64 *%ptr + %final = getelementptr i64 *%ptr, i64 1 + store volatile i64 %add14, i64 *%final + ret void +} + +; Like f1, but only needs one call-saved GPR, which ought to be %r14. +define void @f3(i64 *%ptr) { +; CHECK: f3: +; CHECK: stmg %r14, %r15, 112(%r15) +; CHECK-NOT: %r15 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; ...main function body... +; CHECK-NOT: %r15 +; CHECK-NOT: %r6 +; CHECK-NOT: %r7 +; CHECK-NOT: %r8 +; CHECK-NOT: %r9 +; CHECK-NOT: %r10 +; CHECK-NOT: %r11 +; CHECK-NOT: %r12 +; CHECK-NOT: %r13 +; CHECK: stg {{.*}}, 8(%r2) +; CHECK: lmg %r14, %r15, 112(%r15) +; CHECK: br %r14 + %l0 = load volatile i64 *%ptr + %l1 = load volatile i64 *%ptr + %l3 = load volatile i64 *%ptr + %l4 = load volatile i64 *%ptr + %l5 = load volatile i64 *%ptr + %l14 = load volatile i64 *%ptr + %add0 = add i64 %l0, %l0 + %add1 = add i64 %l1, %add0 + %add3 = add i64 %l3, %add1 + %add4 = add i64 %l4, %add3 + %add5 = add i64 %l5, %add4 + %add14 = add i64 %l14, %add5 + store volatile i64 %add0, i64 *%ptr + store volatile i64 %add1, i64 *%ptr + store volatile i64 %add3, i64 *%ptr + store volatile i64 %add4, i64 *%ptr + store volatile i64 %add5, i64 *%ptr + %final = getelementptr i64 *%ptr, i64 1 + store volatile i64 %add14, i64 *%final + ret void +} + +; This function should use all call-clobbered GPRs but no call-saved ones. +; It shouldn't need to touch the stack at all. +define void @f4(i64 *%ptr) { +; CHECK: f4: +; CHECK-NOT: %r15 +; CHECK-NOT: %r6 +; CHECK-NOT: %r7 +; CHECK-NOT: %r8 +; CHECK-NOT: %r9 +; CHECK-NOT: %r10 +; CHECK-NOT: %r11 +; CHECK-NOT: %r12 +; CHECK-NOT: %r13 +; CHECK: br %r14 + %l0 = load volatile i64 *%ptr + %l1 = load volatile i64 *%ptr + %l3 = load volatile i64 *%ptr + %l4 = load volatile i64 *%ptr + %l5 = load volatile i64 *%ptr + %add0 = add i64 %l0, %l0 + %add1 = add i64 %l1, %add0 + %add3 = add i64 %l3, %add1 + %add4 = add i64 %l4, %add3 + %add5 = add i64 %l5, %add4 + store volatile i64 %add0, i64 *%ptr + store volatile i64 %add1, i64 *%ptr + store volatile i64 %add3, i64 *%ptr + store volatile i64 %add4, i64 *%ptr + %final = getelementptr i64 *%ptr, i64 1 + store volatile i64 %add5, i64 *%final + ret void +} diff --git a/test/CodeGen/SystemZ/frame-07.ll b/test/CodeGen/SystemZ/frame-07.ll new file mode 100644 index 0000000..cfe9f86 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-07.ll @@ -0,0 +1,249 @@ +; Test the saving and restoring of FPRs in large frames. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +; Test a frame size that requires some FPRs to be saved and loaded using +; the 20-bit STDY and LDY while others can use the 12-bit STD and LD. +; The frame is big enough to require an emergency spill slot at 160(%r15), +; as well as the 8 FPR save slots. Get a frame of size 4128 by allocating +; (4128 - 168 - 8 * 8) / 8 = 487 extra doublewords. +define void @f1(double *%ptr, i64 %x) { +; CHECK-NOFP: f1: +; CHECK-NOFP: aghi %r15, -4128 +; CHECK-NOFP: .cfi_def_cfa_offset 4288 +; CHECK-NOFP: stdy %f8, 4120(%r15) +; CHECK-NOFP: stdy %f9, 4112(%r15) +; CHECK-NOFP: stdy %f10, 4104(%r15) +; CHECK-NOFP: stdy %f11, 4096(%r15) +; CHECK-NOFP: std %f12, 4088(%r15) +; CHECK-NOFP: std %f13, 4080(%r15) +; CHECK-NOFP: std %f14, 4072(%r15) +; CHECK-NOFP: std %f15, 4064(%r15) +; CHECK-NOFP: .cfi_offset %f8, -168 +; CHECK-NOFP: .cfi_offset %f9, -176 +; CHECK-NOFP: .cfi_offset %f10, -184 +; CHECK-NOFP: .cfi_offset %f11, -192 +; CHECK-NOFP: .cfi_offset %f12, -200 +; CHECK-NOFP: .cfi_offset %f13, -208 +; CHECK-NOFP: .cfi_offset %f14, -216 +; CHECK-NOFP: .cfi_offset %f15, -224 +; ...main function body... +; CHECK-NOFP: ldy %f8, 4120(%r15) +; CHECK-NOFP: ldy %f9, 4112(%r15) +; CHECK-NOFP: ldy %f10, 4104(%r15) +; CHECK-NOFP: ldy %f11, 4096(%r15) +; CHECK-NOFP: ld %f12, 4088(%r15) +; CHECK-NOFP: ld %f13, 4080(%r15) +; CHECK-NOFP: ld %f14, 4072(%r15) +; CHECK-NOFP: ld %f15, 4064(%r15) +; CHECK-NOFP: aghi %r15, 4128 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f1: +; CHECK-FP: stmg %r11, %r15, 88(%r15) +; CHECK-FP: aghi %r15, -4128 +; CHECK-FP: .cfi_def_cfa_offset 4288 +; CHECK-FP: lgr %r11, %r15 +; CHECK-FP: .cfi_def_cfa_register %r11 +; CHECK-FP: stdy %f8, 4120(%r11) +; CHECK-FP: stdy %f9, 4112(%r11) +; CHECK-FP: stdy %f10, 4104(%r11) +; CHECK-FP: stdy %f11, 4096(%r11) +; CHECK-FP: std %f12, 4088(%r11) +; CHECK-FP: std %f13, 4080(%r11) +; CHECK-FP: std %f14, 4072(%r11) +; CHECK-FP: std %f15, 4064(%r11) +; ...main function body... +; CHECK-FP: ldy %f8, 4120(%r11) +; CHECK-FP: ldy %f9, 4112(%r11) +; CHECK-FP: ldy %f10, 4104(%r11) +; CHECK-FP: ldy %f11, 4096(%r11) +; CHECK-FP: ld %f12, 4088(%r11) +; CHECK-FP: ld %f13, 4080(%r11) +; CHECK-FP: ld %f14, 4072(%r11) +; CHECK-FP: ld %f15, 4064(%r11) +; CHECK-FP: lmg %r11, %r15, 4216(%r11) +; CHECK-FP: br %r14 + %y = alloca [487 x i64], align 8 + %elem = getelementptr inbounds [487 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %elem + %l0 = load volatile double *%ptr + %l1 = load volatile double *%ptr + %l2 = load volatile double *%ptr + %l3 = load volatile double *%ptr + %l4 = load volatile double *%ptr + %l5 = load volatile double *%ptr + %l6 = load volatile double *%ptr + %l7 = load volatile double *%ptr + %l8 = load volatile double *%ptr + %l9 = load volatile double *%ptr + %l10 = load volatile double *%ptr + %l11 = load volatile double *%ptr + %l12 = load volatile double *%ptr + %l13 = load volatile double *%ptr + %l14 = load volatile double *%ptr + %l15 = load volatile double *%ptr + %add0 = fadd double %l0, %l0 + %add1 = fadd double %l1, %add0 + %add2 = fadd double %l2, %add1 + %add3 = fadd double %l3, %add2 + %add4 = fadd double %l4, %add3 + %add5 = fadd double %l5, %add4 + %add6 = fadd double %l6, %add5 + %add7 = fadd double %l7, %add6 + %add8 = fadd double %l8, %add7 + %add9 = fadd double %l9, %add8 + %add10 = fadd double %l10, %add9 + %add11 = fadd double %l11, %add10 + %add12 = fadd double %l12, %add11 + %add13 = fadd double %l13, %add12 + %add14 = fadd double %l14, %add13 + %add15 = fadd double %l15, %add14 + store volatile double %add0, double *%ptr + store volatile double %add1, double *%ptr + store volatile double %add2, double *%ptr + store volatile double %add3, double *%ptr + store volatile double %add4, double *%ptr + store volatile double %add5, double *%ptr + store volatile double %add6, double *%ptr + store volatile double %add7, double *%ptr + store volatile double %add8, double *%ptr + store volatile double %add9, double *%ptr + store volatile double %add10, double *%ptr + store volatile double %add11, double *%ptr + store volatile double %add12, double *%ptr + store volatile double %add13, double *%ptr + store volatile double %add14, double *%ptr + store volatile double %add15, double *%ptr + ret void +} + +; Test a frame size that requires some FPRs to be saved and loaded using +; an indexed STD and LD while others can use the 20-bit STDY and LDY. +; The index can be any call-clobbered GPR except %r0. +; +; Don't require the accesses to share the same LLILH; that would be a +; good optimisation but is really a different test. +; +; As above, get a frame of size 524320 by allocating +; (524320 - 168 - 8 * 8) / 8 = 65511 extra doublewords. +define void @f2(double *%ptr, i64 %x) { +; CHECK-NOFP: f2: +; CHECK-NOFP: agfi %r15, -524320 +; CHECK-NOFP: .cfi_def_cfa_offset 524480 +; CHECK-NOFP: llilh [[INDEX:%r[1-5]]], 8 +; CHECK-NOFP: std %f8, 24([[INDEX]],%r15) +; CHECK-NOFP: std %f9, 16({{%r[1-5]}},%r15) +; CHECK-NOFP: std %f10, 8({{%r[1-5]}},%r15) +; CHECK-NOFP: std %f11, 0({{%r[1-5]}},%r15) +; CHECK-NOFP: stdy %f12, 524280(%r15) +; CHECK-NOFP: stdy %f13, 524272(%r15) +; CHECK-NOFP: stdy %f14, 524264(%r15) +; CHECK-NOFP: stdy %f15, 524256(%r15) +; CHECK-NOFP: .cfi_offset %f8, -168 +; CHECK-NOFP: .cfi_offset %f9, -176 +; CHECK-NOFP: .cfi_offset %f10, -184 +; CHECK-NOFP: .cfi_offset %f11, -192 +; CHECK-NOFP: .cfi_offset %f12, -200 +; CHECK-NOFP: .cfi_offset %f13, -208 +; CHECK-NOFP: .cfi_offset %f14, -216 +; CHECK-NOFP: .cfi_offset %f15, -224 +; ...main function body... +; CHECK-NOFP: ld %f8, 24({{%r[1-5]}},%r15) +; CHECK-NOFP: ld %f9, 16({{%r[1-5]}},%r15) +; CHECK-NOFP: ld %f10, 8({{%r[1-5]}},%r15) +; CHECK-NOFP: ld %f11, 0({{%r[1-5]}},%r15) +; CHECK-NOFP: ldy %f12, 524280(%r15) +; CHECK-NOFP: ldy %f13, 524272(%r15) +; CHECK-NOFP: ldy %f14, 524264(%r15) +; CHECK-NOFP: ldy %f15, 524256(%r15) +; CHECK-NOFP: agfi %r15, 524320 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f2: +; CHECK-FP: stmg %r11, %r15, 88(%r15) +; CHECK-FP: agfi %r15, -524320 +; CHECK-FP: .cfi_def_cfa_offset 524480 +; CHECK-FP: llilh [[INDEX:%r[1-5]]], 8 +; CHECK-FP: std %f8, 24([[INDEX]],%r11) +; CHECK-FP: std %f9, 16({{%r[1-5]}},%r11) +; CHECK-FP: std %f10, 8({{%r[1-5]}},%r11) +; CHECK-FP: std %f11, 0({{%r[1-5]}},%r11) +; CHECK-FP: stdy %f12, 524280(%r11) +; CHECK-FP: stdy %f13, 524272(%r11) +; CHECK-FP: stdy %f14, 524264(%r11) +; CHECK-FP: stdy %f15, 524256(%r11) +; CHECK-FP: .cfi_offset %f8, -168 +; CHECK-FP: .cfi_offset %f9, -176 +; CHECK-FP: .cfi_offset %f10, -184 +; CHECK-FP: .cfi_offset %f11, -192 +; CHECK-FP: .cfi_offset %f12, -200 +; CHECK-FP: .cfi_offset %f13, -208 +; CHECK-FP: .cfi_offset %f14, -216 +; CHECK-FP: .cfi_offset %f15, -224 +; ...main function body... +; CHECK-FP: ld %f8, 24({{%r[1-5]}},%r11) +; CHECK-FP: ld %f9, 16({{%r[1-5]}},%r11) +; CHECK-FP: ld %f10, 8({{%r[1-5]}},%r11) +; CHECK-FP: ld %f11, 0({{%r[1-5]}},%r11) +; CHECK-FP: ldy %f12, 524280(%r11) +; CHECK-FP: ldy %f13, 524272(%r11) +; CHECK-FP: ldy %f14, 524264(%r11) +; CHECK-FP: ldy %f15, 524256(%r11) +; CHECK-FP: aghi %r11, 128 +; CHECK-FP: lmg %r11, %r15, 524280(%r11) +; CHECK-FP: br %r14 + %y = alloca [65511 x i64], align 8 + %elem = getelementptr inbounds [65511 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %elem + %l0 = load volatile double *%ptr + %l1 = load volatile double *%ptr + %l2 = load volatile double *%ptr + %l3 = load volatile double *%ptr + %l4 = load volatile double *%ptr + %l5 = load volatile double *%ptr + %l6 = load volatile double *%ptr + %l7 = load volatile double *%ptr + %l8 = load volatile double *%ptr + %l9 = load volatile double *%ptr + %l10 = load volatile double *%ptr + %l11 = load volatile double *%ptr + %l12 = load volatile double *%ptr + %l13 = load volatile double *%ptr + %l14 = load volatile double *%ptr + %l15 = load volatile double *%ptr + %add0 = fadd double %l0, %l0 + %add1 = fadd double %l1, %add0 + %add2 = fadd double %l2, %add1 + %add3 = fadd double %l3, %add2 + %add4 = fadd double %l4, %add3 + %add5 = fadd double %l5, %add4 + %add6 = fadd double %l6, %add5 + %add7 = fadd double %l7, %add6 + %add8 = fadd double %l8, %add7 + %add9 = fadd double %l9, %add8 + %add10 = fadd double %l10, %add9 + %add11 = fadd double %l11, %add10 + %add12 = fadd double %l12, %add11 + %add13 = fadd double %l13, %add12 + %add14 = fadd double %l14, %add13 + %add15 = fadd double %l15, %add14 + store volatile double %add0, double *%ptr + store volatile double %add1, double *%ptr + store volatile double %add2, double *%ptr + store volatile double %add3, double *%ptr + store volatile double %add4, double *%ptr + store volatile double %add5, double *%ptr + store volatile double %add6, double *%ptr + store volatile double %add7, double *%ptr + store volatile double %add8, double *%ptr + store volatile double %add9, double *%ptr + store volatile double %add10, double *%ptr + store volatile double %add11, double *%ptr + store volatile double %add12, double *%ptr + store volatile double %add13, double *%ptr + store volatile double %add14, double *%ptr + store volatile double %add15, double *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-08.ll b/test/CodeGen/SystemZ/frame-08.ll new file mode 100644 index 0000000..6cf6378 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-08.ll @@ -0,0 +1,277 @@ +; Test the saving and restoring of GPRs in large frames. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; This is the largest frame size that can use a plain LMG for %r6 and above. +; It is big enough to require an emergency spill slot at 160(%r15), +; so get a frame of size 524232 by allocating (524232 - 168) / 8 = 65508 +; extra doublewords. +define void @f1(i32 *%ptr, i64 %x) { +; CHECK: f1: +; CHECK: stmg %r6, %r15, 48(%r15) +; CHECK: .cfi_offset %r6, -112 +; CHECK: .cfi_offset %r7, -104 +; CHECK: .cfi_offset %r8, -96 +; CHECK: .cfi_offset %r9, -88 +; CHECK: .cfi_offset %r10, -80 +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r12, -64 +; CHECK: .cfi_offset %r13, -56 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; CHECK: agfi %r15, -524232 +; CHECK: .cfi_def_cfa_offset 524392 +; ...main function body... +; CHECK-NOT: ag +; CHECK: lmg %r6, %r15, 524280(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l6 = load volatile i32 *%ptr + %l7 = load volatile i32 *%ptr + %l8 = load volatile i32 *%ptr + %l9 = load volatile i32 *%ptr + %l10 = load volatile i32 *%ptr + %l11 = load volatile i32 *%ptr + %l12 = load volatile i32 *%ptr + %l13 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add4 = add i32 %l4, %add1 + %add5 = add i32 %l5, %add4 + %add6 = add i32 %l6, %add5 + %add7 = add i32 %l7, %add6 + %add8 = add i32 %l8, %add7 + %add9 = add i32 %l9, %add8 + %add10 = add i32 %l10, %add9 + %add11 = add i32 %l11, %add10 + %add12 = add i32 %l12, %add11 + %add13 = add i32 %l13, %add12 + %add14 = add i32 %l14, %add13 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add6, i32 *%ptr + store volatile i32 %add7, i32 *%ptr + store volatile i32 %add8, i32 *%ptr + store volatile i32 %add9, i32 *%ptr + store volatile i32 %add10, i32 *%ptr + store volatile i32 %add11, i32 *%ptr + store volatile i32 %add12, i32 *%ptr + store volatile i32 %add13, i32 *%ptr + store volatile i32 %add14, i32 *%ptr + %y = alloca [65508 x i64], align 8 + %entry = getelementptr inbounds [65508 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %entry + ret void +} + +; This is the largest frame size that can use a plain LMG for %r14 and above +; It is big enough to require an emergency spill slot at 160(%r15), +; so get a frame of size 524168 by allocating (524168 - 168) / 8 = 65500 +; extra doublewords. +define void @f2(i32 *%ptr, i64 %x) { +; CHECK: f2: +; CHECK: stmg %r14, %r15, 112(%r15) +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; CHECK: agfi %r15, -524168 +; CHECK: .cfi_def_cfa_offset 524328 +; ...main function body... +; CHECK-NOT: ag +; CHECK: lmg %r14, %r15, 524280(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add4 = add i32 %l4, %add1 + %add5 = add i32 %l5, %add4 + %add14 = add i32 %l14, %add5 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add14, i32 *%ptr + %y = alloca [65500 x i64], align 8 + %entry = getelementptr inbounds [65500 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %entry + ret void +} + +; Like f1 but with a frame that is 8 bytes bigger. This is the smallest +; frame size that needs two instructions to perform the final LMG for +; %r6 and above. +define void @f3(i32 *%ptr, i64 %x) { +; CHECK: f3: +; CHECK: stmg %r6, %r15, 48(%r15) +; CHECK: .cfi_offset %r6, -112 +; CHECK: .cfi_offset %r7, -104 +; CHECK: .cfi_offset %r8, -96 +; CHECK: .cfi_offset %r9, -88 +; CHECK: .cfi_offset %r10, -80 +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r12, -64 +; CHECK: .cfi_offset %r13, -56 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; CHECK: agfi %r15, -524240 +; CHECK: .cfi_def_cfa_offset 524400 +; ...main function body... +; CHECK: aghi %r15, 8 +; CHECK: lmg %r6, %r15, 524280(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l6 = load volatile i32 *%ptr + %l7 = load volatile i32 *%ptr + %l8 = load volatile i32 *%ptr + %l9 = load volatile i32 *%ptr + %l10 = load volatile i32 *%ptr + %l11 = load volatile i32 *%ptr + %l12 = load volatile i32 *%ptr + %l13 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add4 = add i32 %l4, %add1 + %add5 = add i32 %l5, %add4 + %add6 = add i32 %l6, %add5 + %add7 = add i32 %l7, %add6 + %add8 = add i32 %l8, %add7 + %add9 = add i32 %l9, %add8 + %add10 = add i32 %l10, %add9 + %add11 = add i32 %l11, %add10 + %add12 = add i32 %l12, %add11 + %add13 = add i32 %l13, %add12 + %add14 = add i32 %l14, %add13 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add6, i32 *%ptr + store volatile i32 %add7, i32 *%ptr + store volatile i32 %add8, i32 *%ptr + store volatile i32 %add9, i32 *%ptr + store volatile i32 %add10, i32 *%ptr + store volatile i32 %add11, i32 *%ptr + store volatile i32 %add12, i32 *%ptr + store volatile i32 %add13, i32 *%ptr + store volatile i32 %add14, i32 *%ptr + %y = alloca [65509 x i64], align 8 + %entry = getelementptr inbounds [65509 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %entry + ret void +} + +; Like f2 but with a frame that is 8 bytes bigger. This is the smallest +; frame size that needs two instructions to perform the final LMG for +; %r14 and %r15. +define void @f4(i32 *%ptr, i64 %x) { +; CHECK: f4: +; CHECK: stmg %r14, %r15, 112(%r15) +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; CHECK: agfi %r15, -524176 +; CHECK: .cfi_def_cfa_offset 524336 +; ...main function body... +; CHECK: aghi %r15, 8 +; CHECK: lmg %r14, %r15, 524280(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add4 = add i32 %l4, %add1 + %add5 = add i32 %l5, %add4 + %add14 = add i32 %l14, %add5 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add14, i32 *%ptr + %y = alloca [65501 x i64], align 8 + %entry = getelementptr inbounds [65501 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %entry + ret void +} + +; This is the largest frame size for which the prepatory increment for +; "lmg %r14, %r15, ..." can be done using AGHI. +define void @f5(i32 *%ptr, i64 %x) { +; CHECK: f5: +; CHECK: stmg %r14, %r15, 112(%r15) +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; CHECK: agfi %r15, -556928 +; CHECK: .cfi_def_cfa_offset 557088 +; ...main function body... +; CHECK: aghi %r15, 32760 +; CHECK: lmg %r14, %r15, 524280(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add4 = add i32 %l4, %add1 + %add5 = add i32 %l5, %add4 + %add14 = add i32 %l14, %add5 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add14, i32 *%ptr + %y = alloca [69595 x i64], align 8 + %entry = getelementptr inbounds [69595 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %entry + ret void +} + +; This is the smallest frame size for which the prepatory increment for +; "lmg %r14, %r15, ..." needs to be done using AGFI. +define void @f6(i32 *%ptr, i64 %x) { +; CHECK: f6: +; CHECK: stmg %r14, %r15, 112(%r15) +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; CHECK: agfi %r15, -556936 +; CHECK: .cfi_def_cfa_offset 557096 +; ...main function body... +; CHECK: agfi %r15, 32768 +; CHECK: lmg %r14, %r15, 524280(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add4 = add i32 %l4, %add1 + %add5 = add i32 %l5, %add4 + %add14 = add i32 %l14, %add5 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add14, i32 *%ptr + %y = alloca [69596 x i64], align 8 + %entry = getelementptr inbounds [69596 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %entry + ret void +} diff --git a/test/CodeGen/SystemZ/frame-09.ll b/test/CodeGen/SystemZ/frame-09.ll new file mode 100644 index 0000000..eac6336 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-09.ll @@ -0,0 +1,153 @@ +; Test the handling of the frame pointer (%r11). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck %s + +; We should always initialise %r11 when FP elimination is disabled. +; We don't need to allocate any more than the caller-provided 160-byte +; area though. +define i32 @f1(i32 %x) { +; CHECK: f1: +; CHECK: stmg %r11, %r15, 88(%r15) +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r15, -40 +; CHECK-NOT: ag +; CHECK: lgr %r11, %r15 +; CHECK: .cfi_def_cfa_register %r11 +; CHECK: lmg %r11, %r15, 88(%r11) +; CHECK: br %r14 + %y = add i32 %x, 1 + ret i32 %y +} + +; Make sure that frame accesses after the initial allocation are relative +; to %r11 rather than %r15. +define void @f2(i64 %x) { +; CHECK: f2: +; CHECK: stmg %r11, %r15, 88(%r15) +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r15, -40 +; CHECK: aghi %r15, -168 +; CHECK: .cfi_def_cfa_offset 328 +; CHECK: lgr %r11, %r15 +; CHECK: .cfi_def_cfa_register %r11 +; CHECK: stg %r2, 160(%r11) +; CHECK: lmg %r11, %r15, 256(%r11) +; CHECK: br %r14 + %y = alloca i64, align 8 + store volatile i64 %x, i64* %y + ret void +} + +; This function should require all GPRs but no other spill slots. +; It shouldn't need to allocate its own frame. +define void @f3(i32 *%ptr) { +; CHECK: f3: +; CHECK: stmg %r6, %r15, 48(%r15) +; CHECK-NOT: %r15 +; CHECK-NOT: %r11 +; CHECK: .cfi_offset %r6, -112 +; CHECK: .cfi_offset %r7, -104 +; CHECK: .cfi_offset %r8, -96 +; CHECK: .cfi_offset %r9, -88 +; CHECK: .cfi_offset %r10, -80 +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r12, -64 +; CHECK: .cfi_offset %r13, -56 +; CHECK: .cfi_offset %r14, -48 +; CHECK: .cfi_offset %r15, -40 +; CHECK-NOT: ag +; CHECK: lgr %r11, %r15 +; CHECK: .cfi_def_cfa_register %r11 +; ...main function body... +; CHECK-NOT: %r15 +; CHECK-NOT: %r11 +; CHECK: st {{.*}}, 4(%r2) +; CHECK: lmg %r6, %r15, 48(%r11) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l3 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l6 = load volatile i32 *%ptr + %l7 = load volatile i32 *%ptr + %l8 = load volatile i32 *%ptr + %l9 = load volatile i32 *%ptr + %l10 = load volatile i32 *%ptr + %l12 = load volatile i32 *%ptr + %l13 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %add0 = add i32 %l0, %l0 + %add1 = add i32 %l1, %add0 + %add3 = add i32 %l3, %add1 + %add4 = add i32 %l4, %add3 + %add5 = add i32 %l5, %add4 + %add6 = add i32 %l6, %add5 + %add7 = add i32 %l7, %add6 + %add8 = add i32 %l8, %add7 + %add9 = add i32 %l9, %add8 + %add10 = add i32 %l10, %add9 + %add12 = add i32 %l12, %add10 + %add13 = add i32 %l13, %add12 + %add14 = add i32 %l14, %add13 + store volatile i32 %add0, i32 *%ptr + store volatile i32 %add1, i32 *%ptr + store volatile i32 %add3, i32 *%ptr + store volatile i32 %add4, i32 *%ptr + store volatile i32 %add5, i32 *%ptr + store volatile i32 %add6, i32 *%ptr + store volatile i32 %add7, i32 *%ptr + store volatile i32 %add8, i32 *%ptr + store volatile i32 %add9, i32 *%ptr + store volatile i32 %add10, i32 *%ptr + store volatile i32 %add12, i32 *%ptr + store volatile i32 %add13, i32 *%ptr + %final = getelementptr i32 *%ptr, i32 1 + store volatile i32 %add14, i32 *%final + ret void +} + +; The largest frame for which the LMG is in range. This frame has an +; emergency spill slot at 160(%r11), so create a frame of size 524192 +; by allocating (524192 - 168) / 8 = 65503 doublewords. +define void @f4(i64 %x) { +; CHECK: f4: +; CHECK: stmg %r11, %r15, 88(%r15) +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r15, -40 +; CHECK: agfi %r15, -524192 +; CHECK: .cfi_def_cfa_offset 524352 +; CHECK: lgr %r11, %r15 +; CHECK: .cfi_def_cfa_register %r11 +; CHECK: stg %r2, 168(%r11) +; CHECK-NOT: ag +; CHECK: lmg %r11, %r15, 524280(%r11) +; CHECK: br %r14 + %y = alloca [65503 x i64], align 8 + %ptr = getelementptr inbounds [65503 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %ptr + ret void +} + +; The next frame size larger than f4. +define void @f5(i64 %x) { +; CHECK: f5: +; CHECK: stmg %r11, %r15, 88(%r15) +; CHECK: .cfi_offset %r11, -72 +; CHECK: .cfi_offset %r15, -40 +; CHECK: agfi %r15, -524200 +; CHECK: .cfi_def_cfa_offset 524360 +; CHECK: lgr %r11, %r15 +; CHECK: .cfi_def_cfa_register %r11 +; CHECK: stg %r2, 168(%r11) +; CHECK: aghi %r11, 8 +; CHECK: lmg %r11, %r15, 524280(%r11) +; CHECK: br %r14 + %y = alloca [65504 x i64], align 8 + %ptr = getelementptr inbounds [65504 x i64]* %y, i64 0, i64 0 + store volatile i64 %x, i64* %ptr + ret void +} + +; The tests above establish that %r11 is handled like %r15 for LMG. +; Rely on the %r15-based tests in frame-08.ll for other cases. diff --git a/test/CodeGen/SystemZ/frame-10.ll b/test/CodeGen/SystemZ/frame-10.ll new file mode 100644 index 0000000..399a412 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-10.ll @@ -0,0 +1,14 @@ +; Test the stacksave builtin. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@llvm.stacksave() + +define void @f1(i8 **%dest) { +; CHECK: f1: +; CHECK: stg %r15, 0(%r2) +; CHECK: br %r14 + %addr = call i8 *@llvm.stacksave() + store volatile i8 *%addr, i8 **%dest + ret void +} diff --git a/test/CodeGen/SystemZ/frame-11.ll b/test/CodeGen/SystemZ/frame-11.ll new file mode 100644 index 0000000..8422205 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-11.ll @@ -0,0 +1,18 @@ +; Test the stackrestore builtin. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.stackrestore(i8 *) + +; we should use a frame pointer and tear down the frame based on %r11 +; rather than %r15. +define void @f1(i8 *%src) { +; CHECK: f1: +; CHECK: stmg %r11, %r15, 88(%r15) +; CHECK: lgr %r11, %r15 +; CHECK: lgr %r15, %r2 +; CHECK: lmg %r11, %r15, 88(%r11) +; CHECK: br %r14 + call void @llvm.stackrestore(i8 *%src) + ret void +} diff --git a/test/CodeGen/SystemZ/frame-13.ll b/test/CodeGen/SystemZ/frame-13.ll new file mode 100644 index 0000000..fa6b845 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-13.ll @@ -0,0 +1,299 @@ +; Test the handling of base + 12-bit displacement addresses for large frames, +; in cases where no 20-bit form exists. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +; This file tests what happens when a displacement is converted from +; being relative to the start of a frame object to being relative to +; the frame itself. In some cases the test is only possible if two +; objects are allocated. +; +; Rather than rely on a particular order for those objects, the tests +; instead allocate two objects of the same size and apply the test to +; both of them. For consistency, all tests follow this model, even if +; one object would actually be enough. + +; First check the highest in-range offset after conversion, which is 4092 +; for word-addressing instructions like MVHI. +; +; The last in-range doubleword offset is 4088. Since the frame has an +; emergency spill slot at 160(%r15), the amount that we need to allocate +; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980 +; words. +define void @f1() { +; CHECK-NOFP: f1: +; CHECK-NOFP: mvhi 4092(%r15), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f1: +; CHECK-FP: mvhi 4092(%r11), 42 +; CHECK-FP: br %r14 + %region1 = alloca [980 x i32], align 8 + %region2 = alloca [980 x i32], align 8 + %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 1 + %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 1 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; Test the first out-of-range offset. We cannot use an index register here. +define void @f2() { +; CHECK-NOFP: f2: +; CHECK-NOFP: lay %r1, 4096(%r15) +; CHECK-NOFP: mvhi 0(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f2: +; CHECK-FP: lay %r1, 4096(%r11) +; CHECK-FP: mvhi 0(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [980 x i32], align 8 + %region2 = alloca [980 x i32], align 8 + %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; Test the next offset after that. +define void @f3() { +; CHECK-NOFP: f3: +; CHECK-NOFP: lay %r1, 4096(%r15) +; CHECK-NOFP: mvhi 4(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f3: +; CHECK-FP: lay %r1, 4096(%r11) +; CHECK-FP: mvhi 4(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [980 x i32], align 8 + %region2 = alloca [980 x i32], align 8 + %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 3 + %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 3 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; Add 4096 bytes (1024 words) to the size of each object and repeat. +define void @f4() { +; CHECK-NOFP: f4: +; CHECK-NOFP: lay %r1, 4096(%r15) +; CHECK-NOFP: mvhi 4092(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f4: +; CHECK-FP: lay %r1, 4096(%r11) +; CHECK-FP: mvhi 4092(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [2004 x i32], align 8 + %region2 = alloca [2004 x i32], align 8 + %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 1 + %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 1 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; ...as above. +define void @f5() { +; CHECK-NOFP: f5: +; CHECK-NOFP: lay %r1, 8192(%r15) +; CHECK-NOFP: mvhi 0(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f5: +; CHECK-FP: lay %r1, 8192(%r11) +; CHECK-FP: mvhi 0(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [2004 x i32], align 8 + %region2 = alloca [2004 x i32], align 8 + %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 2 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; ...as above. +define void @f6() { +; CHECK-NOFP: f6: +; CHECK-NOFP: lay %r1, 8192(%r15) +; CHECK-NOFP: mvhi 4(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f6: +; CHECK-FP: lay %r1, 8192(%r11) +; CHECK-FP: mvhi 4(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [2004 x i32], align 8 + %region2 = alloca [2004 x i32], align 8 + %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 3 + %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 3 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; Now try an offset of 4092 from the start of the object, with the object +; being at offset 8192. This time we need objects of (8192 - 168) / 4 = 2006 +; words. +define void @f7() { +; CHECK-NOFP: f7: +; CHECK-NOFP: lay %r1, 8192(%r15) +; CHECK-NOFP: mvhi 4092(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f7: +; CHECK-FP: lay %r1, 8192(%r11) +; CHECK-FP: mvhi 4092(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [2006 x i32], align 8 + %region2 = alloca [2006 x i32], align 8 + %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1023 + %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1023 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; Keep the object-relative offset the same but bump the size of the +; objects by one doubleword. +define void @f8() { +; CHECK-NOFP: f8: +; CHECK-NOFP: lay %r1, 12288(%r15) +; CHECK-NOFP: mvhi 4(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f8: +; CHECK-FP: lay %r1, 12288(%r11) +; CHECK-FP: mvhi 4(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [2008 x i32], align 8 + %region2 = alloca [2008 x i32], align 8 + %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1023 + %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1023 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; Check a case where the original displacement is out of range. The backend +; should force an LAY from the outset. We don't yet do any kind of anchor +; optimization, so there should be no offset on the MVHI itself. +define void @f9() { +; CHECK-NOFP: f9: +; CHECK-NOFP: lay %r1, 12296(%r15) +; CHECK-NOFP: mvhi 0(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f9: +; CHECK-FP: lay %r1, 12296(%r11) +; CHECK-FP: mvhi 0(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [2008 x i32], align 8 + %region2 = alloca [2008 x i32], align 8 + %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1024 + %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1024 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + ret void +} + +; Repeat f2 in a case that needs the emergency spill slot (because all +; call-clobbered registers are live and no call-saved ones have been +; allocated). +define void @f10(i32 *%vptr) { +; CHECK-NOFP: f10: +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: lay [[REGISTER]], 4096(%r15) +; CHECK-NOFP: mvhi 0([[REGISTER]]), 42 +; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f10: +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: lay [[REGISTER]], 4096(%r11) +; CHECK-FP: mvhi 0([[REGISTER]]), 42 +; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: br %r14 + %i0 = load volatile i32 *%vptr + %i1 = load volatile i32 *%vptr + %i3 = load volatile i32 *%vptr + %i4 = load volatile i32 *%vptr + %i5 = load volatile i32 *%vptr + %region1 = alloca [980 x i32], align 8 + %region2 = alloca [980 x i32], align 8 + %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + store volatile i32 %i0, i32 *%vptr + store volatile i32 %i1, i32 *%vptr + store volatile i32 %i3, i32 *%vptr + store volatile i32 %i4, i32 *%vptr + store volatile i32 %i5, i32 *%vptr + ret void +} + +; And again with maximum register pressure. The only spill slot that the +; NOFP case needs is the emergency one, so the offsets are the same as for f2. +; However, the FP case uses %r11 as the frame pointer and must therefore +; spill a second register. This leads to an extra displacement of 8. +define void @f11(i32 *%vptr) { +; CHECK-NOFP: f11: +; CHECK-NOFP: stmg %r6, %r15, +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: lay [[REGISTER]], 4096(%r15) +; CHECK-NOFP: mvhi 0([[REGISTER]]), 42 +; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lmg %r6, %r15, +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f11: +; CHECK-FP: stmg %r6, %r15, +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: lay [[REGISTER]], 4096(%r11) +; CHECK-FP: mvhi 8([[REGISTER]]), 42 +; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lmg %r6, %r15, +; CHECK-FP: br %r14 + %i0 = load volatile i32 *%vptr + %i1 = load volatile i32 *%vptr + %i3 = load volatile i32 *%vptr + %i4 = load volatile i32 *%vptr + %i5 = load volatile i32 *%vptr + %i6 = load volatile i32 *%vptr + %i7 = load volatile i32 *%vptr + %i8 = load volatile i32 *%vptr + %i9 = load volatile i32 *%vptr + %i10 = load volatile i32 *%vptr + %i11 = load volatile i32 *%vptr + %i12 = load volatile i32 *%vptr + %i13 = load volatile i32 *%vptr + %i14 = load volatile i32 *%vptr + %region1 = alloca [980 x i32], align 8 + %region2 = alloca [980 x i32], align 8 + %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2 + store volatile i32 42, i32 *%ptr1 + store volatile i32 42, i32 *%ptr2 + store volatile i32 %i0, i32 *%vptr + store volatile i32 %i1, i32 *%vptr + store volatile i32 %i3, i32 *%vptr + store volatile i32 %i4, i32 *%vptr + store volatile i32 %i5, i32 *%vptr + store volatile i32 %i6, i32 *%vptr + store volatile i32 %i7, i32 *%vptr + store volatile i32 %i8, i32 *%vptr + store volatile i32 %i9, i32 *%vptr + store volatile i32 %i10, i32 *%vptr + store volatile i32 %i11, i32 *%vptr + store volatile i32 %i12, i32 *%vptr + store volatile i32 %i13, i32 *%vptr + store volatile i32 %i14, i32 *%vptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-14.ll b/test/CodeGen/SystemZ/frame-14.ll new file mode 100644 index 0000000..d8ff0a5 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-14.ll @@ -0,0 +1,322 @@ +; Test the handling of base + displacement addresses for large frames, +; in cases where both 12-bit and 20-bit displacements are allowed. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +; This file tests what happens when a displacement is converted from +; being relative to the start of a frame object to being relative to +; the frame itself. In some cases the test is only possible if two +; objects are allocated. +; +; Rather than rely on a particular order for those objects, the tests +; instead allocate two objects of the same size and apply the test to +; both of them. For consistency, all tests follow this model, even if +; one object would actually be enough. + +; First check the highest offset that is in range of the 12-bit form. +; +; The last in-range doubleword offset is 4088. Since the frame has an +; emergency spill slot at 160(%r15), the amount that we need to allocate +; in order to put another object at offset 4088 is 4088 - 168 = 3920 bytes. +define void @f1() { +; CHECK-NOFP: f1: +; CHECK-NOFP: mvi 4095(%r15), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f1: +; CHECK-FP: mvi 4095(%r11), 42 +; CHECK-FP: br %r14 + %region1 = alloca [3920 x i8], align 8 + %region2 = alloca [3920 x i8], align 8 + %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 7 + %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 7 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Test the first offset that is out-of-range of the 12-bit form. +define void @f2() { +; CHECK-NOFP: f2: +; CHECK-NOFP: mviy 4096(%r15), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f2: +; CHECK-FP: mviy 4096(%r11), 42 +; CHECK-FP: br %r14 + %region1 = alloca [3920 x i8], align 8 + %region2 = alloca [3920 x i8], align 8 + %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 8 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Test the last offset that is in range of the 20-bit form. +; +; The last in-range doubleword offset is 524280, so by the same reasoning +; as above, we need to allocate objects of 524280 - 168 = 524122 bytes. +define void @f3() { +; CHECK-NOFP: f3: +; CHECK-NOFP: mviy 524287(%r15), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f3: +; CHECK-FP: mviy 524287(%r11), 42 +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 7 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 7 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Test the first out-of-range offset. We can't use an index register here, +; and the offset is also out of LAY's range, so expect a constant load +; followed by an addition. +define void @f4() { +; CHECK-NOFP: f4: +; CHECK-NOFP: llilh %r1, 8 +; CHECK-NOFP: agr %r1, %r15 +; CHECK-NOFP: mvi 0(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f4: +; CHECK-FP: llilh %r1, 8 +; CHECK-FP: agr %r1, %r11 +; CHECK-FP: mvi 0(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Add 4095 to the previous offset, to test the other end of the MVI range. +; The instruction will actually be STCY before frame lowering. +define void @f5() { +; CHECK-NOFP: f5: +; CHECK-NOFP: llilh %r1, 8 +; CHECK-NOFP: agr %r1, %r15 +; CHECK-NOFP: mvi 4095(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f5: +; CHECK-FP: llilh %r1, 8 +; CHECK-FP: agr %r1, %r11 +; CHECK-FP: mvi 4095(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4103 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4103 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Test the next offset after that, which uses MVIY instead of MVI. +define void @f6() { +; CHECK-NOFP: f6: +; CHECK-NOFP: llilh %r1, 8 +; CHECK-NOFP: agr %r1, %r15 +; CHECK-NOFP: mviy 4096(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f6: +; CHECK-FP: llilh %r1, 8 +; CHECK-FP: agr %r1, %r11 +; CHECK-FP: mviy 4096(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4104 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4104 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Now try an offset of 524287 from the start of the object, with the +; object being at offset 1048576 (1 << 20). The backend prefers to create +; anchors 0x10000 bytes apart, so that the high part can be loaded using +; LLILH while still using MVI in more cases than 0x40000 anchors would. +define void @f7() { +; CHECK-NOFP: f7: +; CHECK-NOFP: llilh %r1, 23 +; CHECK-NOFP: agr %r1, %r15 +; CHECK-NOFP: mviy 65535(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f7: +; CHECK-FP: llilh %r1, 23 +; CHECK-FP: agr %r1, %r11 +; CHECK-FP: mviy 65535(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [1048408 x i8], align 8 + %region2 = alloca [1048408 x i8], align 8 + %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287 + %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Keep the object-relative offset the same but bump the size of the +; objects by one doubleword. +define void @f8() { +; CHECK-NOFP: f8: +; CHECK-NOFP: llilh %r1, 24 +; CHECK-NOFP: agr %r1, %r15 +; CHECK-NOFP: mvi 7(%r1), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f8: +; CHECK-FP: llilh %r1, 24 +; CHECK-FP: agr %r1, %r11 +; CHECK-FP: mvi 7(%r1), 42 +; CHECK-FP: br %r14 + %region1 = alloca [1048416 x i8], align 8 + %region2 = alloca [1048416 x i8], align 8 + %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524287 + %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524287 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Check a case where the original displacement is out of range. The backend +; should force separate address logic from the outset. We don't yet do any +; kind of anchor optimization, so there should be no offset on the MVI itself. +; +; Before frame lowering this is an LA followed by the AGFI seen below. +; The LA then gets lowered into the LLILH/LA form. The exact sequence +; isn't that important though. +define void @f9() { +; CHECK-NOFP: f9: +; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16 +; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15) +; CHECK-NOFP: agfi [[R2]], 524288 +; CHECK-NOFP: mvi 0([[R2]]), 42 +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f9: +; CHECK-FP: llilh [[R1:%r[1-5]]], 16 +; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11) +; CHECK-FP: agfi [[R2]], 524288 +; CHECK-FP: mvi 0([[R2]]), 42 +; CHECK-FP: br %r14 + %region1 = alloca [1048416 x i8], align 8 + %region2 = alloca [1048416 x i8], align 8 + %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524288 + %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524288 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + ret void +} + +; Repeat f4 in a case that needs the emergency spill slot (because all +; call-clobbered registers are live and no call-saved ones have been +; allocated). +define void @f10(i32 *%vptr) { +; CHECK-NOFP: f10: +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: llilh [[REGISTER]], 8 +; CHECK-NOFP: agr [[REGISTER]], %r15 +; CHECK-NOFP: mvi 0([[REGISTER]]), 42 +; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f10: +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: llilh [[REGISTER]], 8 +; CHECK-FP: agr [[REGISTER]], %r11 +; CHECK-FP: mvi 0([[REGISTER]]), 42 +; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: br %r14 + %i0 = load volatile i32 *%vptr + %i1 = load volatile i32 *%vptr + %i3 = load volatile i32 *%vptr + %i4 = load volatile i32 *%vptr + %i5 = load volatile i32 *%vptr + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + store volatile i32 %i0, i32 *%vptr + store volatile i32 %i1, i32 *%vptr + store volatile i32 %i3, i32 *%vptr + store volatile i32 %i4, i32 *%vptr + store volatile i32 %i5, i32 *%vptr + ret void +} + +; And again with maximum register pressure. The only spill slot that the +; NOFP case needs is the emergency one, so the offsets are the same as for f4. +; However, the FP case uses %r11 as the frame pointer and must therefore +; spill a second register. This leads to an extra displacement of 8. +define void @f11(i32 *%vptr) { +; CHECK-NOFP: f11: +; CHECK-NOFP: stmg %r6, %r15, +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: llilh [[REGISTER]], 8 +; CHECK-NOFP: agr [[REGISTER]], %r15 +; CHECK-NOFP: mvi 0([[REGISTER]]), 42 +; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lmg %r6, %r15, +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f11: +; CHECK-FP: stmg %r6, %r15, +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: llilh [[REGISTER]], 8 +; CHECK-FP: agr [[REGISTER]], %r11 +; CHECK-FP: mvi 8([[REGISTER]]), 42 +; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lmg %r6, %r15, +; CHECK-FP: br %r14 + %i0 = load volatile i32 *%vptr + %i1 = load volatile i32 *%vptr + %i3 = load volatile i32 *%vptr + %i4 = load volatile i32 *%vptr + %i5 = load volatile i32 *%vptr + %i6 = load volatile i32 *%vptr + %i7 = load volatile i32 *%vptr + %i8 = load volatile i32 *%vptr + %i9 = load volatile i32 *%vptr + %i10 = load volatile i32 *%vptr + %i11 = load volatile i32 *%vptr + %i12 = load volatile i32 *%vptr + %i13 = load volatile i32 *%vptr + %i14 = load volatile i32 *%vptr + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + store volatile i8 42, i8 *%ptr1 + store volatile i8 42, i8 *%ptr2 + store volatile i32 %i0, i32 *%vptr + store volatile i32 %i1, i32 *%vptr + store volatile i32 %i3, i32 *%vptr + store volatile i32 %i4, i32 *%vptr + store volatile i32 %i5, i32 *%vptr + store volatile i32 %i6, i32 *%vptr + store volatile i32 %i7, i32 *%vptr + store volatile i32 %i8, i32 *%vptr + store volatile i32 %i9, i32 *%vptr + store volatile i32 %i10, i32 *%vptr + store volatile i32 %i11, i32 *%vptr + store volatile i32 %i12, i32 *%vptr + store volatile i32 %i13, i32 *%vptr + store volatile i32 %i14, i32 *%vptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-15.ll b/test/CodeGen/SystemZ/frame-15.ll new file mode 100644 index 0000000..bc87e17 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-15.ll @@ -0,0 +1,352 @@ +; Test the handling of base + index + 12-bit displacement addresses for +; large frames, in cases where no 20-bit form exists. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +declare void @foo(float *%ptr1, float *%ptr2) + +; This file tests what happens when a displacement is converted from +; being relative to the start of a frame object to being relative to +; the frame itself. In some cases the test is only possible if two +; objects are allocated. +; +; Rather than rely on a particular order for those objects, the tests +; instead allocate two objects of the same size and apply the test to +; both of them. For consistency, all tests follow this model, even if +; one object would actually be enough. + +; First check the highest in-range offset after conversion, which is 4092 +; for word-addressing instructions like LDEB. +; +; The last in-range doubleword offset is 4088. Since the frame has an +; emergency spill slot at 160(%r15), the amount that we need to allocate +; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980 +; words. +define void @f1(double *%dst) { +; CHECK-NOFP: f1: +; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f1: +; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r11) +; CHECK-FP: br %r14 + %region1 = alloca [980 x float], align 8 + %region2 = alloca [980 x float], align 8 + %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 1 + %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 1 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; Test the first out-of-range offset. +define void @f2(double *%dst) { +; CHECK-NOFP: f2: +; CHECK-NOFP: lghi %r1, 4096 +; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f2: +; CHECK-FP: lghi %r1, 4096 +; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [980 x float], align 8 + %region2 = alloca [980 x float], align 8 + %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; Test the next offset after that. +define void @f3(double *%dst) { +; CHECK-NOFP: f3: +; CHECK-NOFP: lghi %r1, 4096 +; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f3: +; CHECK-FP: lghi %r1, 4096 +; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [980 x float], align 8 + %region2 = alloca [980 x float], align 8 + %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 3 + %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 3 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; Add 4096 bytes (1024 words) to the size of each object and repeat. +define void @f4(double *%dst) { +; CHECK-NOFP: f4: +; CHECK-NOFP: lghi %r1, 4096 +; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f4: +; CHECK-FP: lghi %r1, 4096 +; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [2004 x float], align 8 + %region2 = alloca [2004 x float], align 8 + %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 1 + %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 1 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; ...as above. +define void @f5(double *%dst) { +; CHECK-NOFP: f5: +; CHECK-NOFP: lghi %r1, 8192 +; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f5: +; CHECK-FP: lghi %r1, 8192 +; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [2004 x float], align 8 + %region2 = alloca [2004 x float], align 8 + %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 2 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; ...as above. +define void @f6(double *%dst) { +; CHECK-NOFP: f6: +; CHECK-NOFP: lghi %r1, 8192 +; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f6: +; CHECK-FP: lghi %r1, 8192 +; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [2004 x float], align 8 + %region2 = alloca [2004 x float], align 8 + %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 3 + %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 3 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; Now try an offset of 4092 from the start of the object, with the object +; being at offset 8192. This time we need objects of (8192 - 168) / 4 = 2006 +; words. +define void @f7(double *%dst) { +; CHECK-NOFP: f7: +; CHECK-NOFP: lghi %r1, 8192 +; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f7: +; CHECK-FP: lghi %r1, 8192 +; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [2006 x float], align 8 + %region2 = alloca [2006 x float], align 8 + %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1023 + %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1023 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; Keep the object-relative offset the same but bump the size of the +; objects by one doubleword. +define void @f8(double *%dst) { +; CHECK-NOFP: f8: +; CHECK-NOFP: lghi %r1, 12288 +; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f8: +; CHECK-FP: lghi %r1, 12288 +; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [2008 x float], align 8 + %region2 = alloca [2008 x float], align 8 + %start1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 1023 + %ptr2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 1023 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; Check a case where the original displacement is out of range. The backend +; should force an LAY from the outset. We don't yet do any kind of anchor +; optimization, so there should be no offset on the LDEB itself. +define void @f9(double *%dst) { +; CHECK-NOFP: f9: +; CHECK-NOFP: lay %r1, 12296(%r15) +; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f9: +; CHECK-FP: lay %r1, 12296(%r11) +; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1) +; CHECK-FP: br %r14 + %region1 = alloca [2008 x float], align 8 + %region2 = alloca [2008 x float], align 8 + %start1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 1024 + %ptr2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 1024 + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} + +; Repeat f2 in a case that needs the emergency spill slot, because all +; call-clobbered and allocated call-saved registers are live. Note that +; %vptr and %dst are copied to call-saved registers, freeing up %r2 and +; %r3 during the main test. +define void @f10(i32 *%vptr, double *%dst) { +; CHECK-NOFP: f10: +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: lghi [[REGISTER]], 4096 +; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r15) +; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f10: +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: lghi [[REGISTER]], 4096 +; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r11) +; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: br %r14 + %region1 = alloca [980 x float], align 8 + %region2 = alloca [980 x float], align 8 + %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2 + %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2 + %i0 = load volatile i32 *%vptr + %i1 = load volatile i32 *%vptr + %i2 = load volatile i32 *%vptr + %i3 = load volatile i32 *%vptr + %i4 = load volatile i32 *%vptr + %i5 = load volatile i32 *%vptr + %i14 = load volatile i32 *%vptr + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + store volatile i32 %i0, i32 *%vptr + store volatile i32 %i1, i32 *%vptr + store volatile i32 %i2, i32 *%vptr + store volatile i32 %i3, i32 *%vptr + store volatile i32 %i4, i32 *%vptr + store volatile i32 %i5, i32 *%vptr + store volatile i32 %i14, i32 *%vptr + ret void +} + +; Repeat f2 in a case where the index register is already occupied. +define void @f11(double *%dst, i64 %index) { +; CHECK-NOFP: f11: +; CHECK-NOFP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3 +; CHECK-NOFP: lay %r1, 4096(%r15) +; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f11: +; CHECK-FP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3 +; CHECK-FP: lay %r1, 4096(%r11) +; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1) +; CHECK-FP: br %r14 + %region1 = alloca [980 x float], align 8 + %region2 = alloca [980 x float], align 8 + %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0 + %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0 + call void @foo(float *%start1, float *%start2) + %elem1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2 + %elem2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2 + %base1 = ptrtoint float *%elem1 to i64 + %base2 = ptrtoint float *%elem2 to i64 + %addr1 = add i64 %base1, %index + %addr2 = add i64 %base2, %index + %ptr1 = inttoptr i64 %addr1 to float * + %ptr2 = inttoptr i64 %addr2 to float * + %float1 = load float *%ptr1 + %float2 = load float *%ptr2 + %double1 = fpext float %float1 to double + %double2 = fpext float %float2 to double + store volatile double %double1, double *%dst + store volatile double %double2, double *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/frame-16.ll b/test/CodeGen/SystemZ/frame-16.ll new file mode 100644 index 0000000..cc5529f --- /dev/null +++ b/test/CodeGen/SystemZ/frame-16.ll @@ -0,0 +1,327 @@ +; Test the handling of base + index + displacement addresses for large frames, +; in cases where both 12-bit and 20-bit displacements are allowed. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + +; This file tests what happens when a displacement is converted from +; being relative to the start of a frame object to being relative to +; the frame itself. In some cases the test is only possible if two +; objects are allocated. +; +; Rather than rely on a particular order for those objects, the tests +; instead allocate two objects of the same size and apply the test to +; both of them. For consistency, all tests follow this model, even if +; one object would actually be enough. + +; First check the highest offset that is in range of the 12-bit form. +; +; The last in-range doubleword offset is 4088. Since the frame has an +; emergency spill slot at 160(%r15), the amount that we need to allocate +; in order to put another object at offset 4088 is 4088 - 168 = 3920 bytes. +define void @f1(i8 %byte) { +; CHECK-NOFP: f1: +; CHECK-NOFP: stc %r2, 4095(%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f1: +; CHECK-FP: stc %r2, 4095(%r11) +; CHECK-FP: br %r14 + %region1 = alloca [3920 x i8], align 8 + %region2 = alloca [3920 x i8], align 8 + %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 7 + %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 7 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Test the first offset that is out-of-range of the 12-bit form. +define void @f2(i8 %byte) { +; CHECK-NOFP: f2: +; CHECK-NOFP: stcy %r2, 4096(%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f2: +; CHECK-FP: stcy %r2, 4096(%r11) +; CHECK-FP: br %r14 + %region1 = alloca [3920 x i8], align 8 + %region2 = alloca [3920 x i8], align 8 + %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 8 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Test the last offset that is in range of the 20-bit form. +; +; The last in-range doubleword offset is 524280, so by the same reasoning +; as above, we need to allocate objects of 524280 - 168 = 524122 bytes. +define void @f3(i8 %byte) { +; CHECK-NOFP: f3: +; CHECK-NOFP: stcy %r2, 524287(%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f3: +; CHECK-FP: stcy %r2, 524287(%r11) +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 7 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 7 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Test the first out-of-range offset. We can't use an index register here, +; and the offset is also out of LAY's range, so expect a constant load +; followed by an addition. +define void @f4(i8 %byte) { +; CHECK-NOFP: f4: +; CHECK-NOFP: llilh %r1, 8 +; CHECK-NOFP: stc %r2, 0(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f4: +; CHECK-FP: llilh %r1, 8 +; CHECK-FP: stc %r2, 0(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Add 4095 to the previous offset, to test the other end of the STC range. +; The instruction will actually be STCY before frame lowering. +define void @f5(i8 %byte) { +; CHECK-NOFP: f5: +; CHECK-NOFP: llilh %r1, 8 +; CHECK-NOFP: stc %r2, 4095(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f5: +; CHECK-FP: llilh %r1, 8 +; CHECK-FP: stc %r2, 4095(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4103 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4103 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Test the next offset after that, which uses STCY instead of STC. +define void @f6(i8 %byte) { +; CHECK-NOFP: f6: +; CHECK-NOFP: llilh %r1, 8 +; CHECK-NOFP: stcy %r2, 4096(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f6: +; CHECK-FP: llilh %r1, 8 +; CHECK-FP: stcy %r2, 4096(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4104 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4104 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Now try an offset of 524287 from the start of the object, with the +; object being at offset 1048576 (1 << 20). The backend prefers to create +; anchors 0x10000 bytes apart, so that the high part can be loaded using +; LLILH while still using STC in more cases than 0x40000 anchors would. +define void @f7(i8 %byte) { +; CHECK-NOFP: f7: +; CHECK-NOFP: llilh %r1, 23 +; CHECK-NOFP: stcy %r2, 65535(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f7: +; CHECK-FP: llilh %r1, 23 +; CHECK-FP: stcy %r2, 65535(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [1048408 x i8], align 8 + %region2 = alloca [1048408 x i8], align 8 + %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287 + %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Keep the object-relative offset the same but bump the size of the +; objects by one doubleword. +define void @f8(i8 %byte) { +; CHECK-NOFP: f8: +; CHECK-NOFP: llilh %r1, 24 +; CHECK-NOFP: stc %r2, 7(%r1,%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f8: +; CHECK-FP: llilh %r1, 24 +; CHECK-FP: stc %r2, 7(%r1,%r11) +; CHECK-FP: br %r14 + %region1 = alloca [1048416 x i8], align 8 + %region2 = alloca [1048416 x i8], align 8 + %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524287 + %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524287 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Check a case where the original displacement is out of range. The backend +; should force separate address logic from the outset. We don't yet do any +; kind of anchor optimization, so there should be no offset on the STC itself. +; +; Before frame lowering this is an LA followed by the AGFI seen below. +; The LA then gets lowered into the LLILH/LA form. The exact sequence +; isn't that important though. +define void @f9(i8 %byte) { +; CHECK-NOFP: f9: +; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16 +; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15) +; CHECK-NOFP: agfi [[R2]], 524288 +; CHECK-NOFP: stc %r2, 0([[R2]]) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f9: +; CHECK-FP: llilh [[R1:%r[1-5]]], 16 +; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11) +; CHECK-FP: agfi [[R2]], 524288 +; CHECK-FP: stc %r2, 0([[R2]]) +; CHECK-FP: br %r14 + %region1 = alloca [1048416 x i8], align 8 + %region2 = alloca [1048416 x i8], align 8 + %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524288 + %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524288 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} + +; Repeat f4 in a case that needs the emergency spill slot (because all +; call-clobbered registers are live and no call-saved ones have been +; allocated). +define void @f10(i32 *%vptr, i8 %byte) { +; CHECK-NOFP: f10: +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: llilh [[REGISTER]], 8 +; CHECK-NOFP: stc %r3, 0([[REGISTER]],%r15) +; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f10: +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: llilh [[REGISTER]], 8 +; CHECK-FP: stc %r3, 0([[REGISTER]],%r11) +; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: br %r14 + %i0 = load volatile i32 *%vptr + %i1 = load volatile i32 *%vptr + %i4 = load volatile i32 *%vptr + %i5 = load volatile i32 *%vptr + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + store volatile i32 %i0, i32 *%vptr + store volatile i32 %i1, i32 *%vptr + store volatile i32 %i4, i32 *%vptr + store volatile i32 %i5, i32 *%vptr + ret void +} + +; And again with maximum register pressure. The only spill slot that the +; NOFP case needs is the emergency one, so the offsets are the same as for f4. +; However, the FP case uses %r11 as the frame pointer and must therefore +; spill a second register. This leads to an extra displacement of 8. +define void @f11(i32 *%vptr, i8 %byte) { +; CHECK-NOFP: f11: +; CHECK-NOFP: stmg %r6, %r15, +; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15) +; CHECK-NOFP: llilh [[REGISTER]], 8 +; CHECK-NOFP: stc %r3, 0([[REGISTER]],%r15) +; CHECK-NOFP: lg [[REGISTER]], 160(%r15) +; CHECK-NOFP: lmg %r6, %r15, +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f11: +; CHECK-FP: stmg %r6, %r15, +; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11) +; CHECK-FP: llilh [[REGISTER]], 8 +; CHECK-FP: stc %r3, 8([[REGISTER]],%r11) +; CHECK-FP: lg [[REGISTER]], 160(%r11) +; CHECK-FP: lmg %r6, %r15, +; CHECK-FP: br %r14 + %i0 = load volatile i32 *%vptr + %i1 = load volatile i32 *%vptr + %i4 = load volatile i32 *%vptr + %i5 = load volatile i32 *%vptr + %i6 = load volatile i32 *%vptr + %i7 = load volatile i32 *%vptr + %i8 = load volatile i32 *%vptr + %i9 = load volatile i32 *%vptr + %i10 = load volatile i32 *%vptr + %i11 = load volatile i32 *%vptr + %i12 = load volatile i32 *%vptr + %i13 = load volatile i32 *%vptr + %i14 = load volatile i32 *%vptr + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + store volatile i32 %i0, i32 *%vptr + store volatile i32 %i1, i32 *%vptr + store volatile i32 %i4, i32 *%vptr + store volatile i32 %i5, i32 *%vptr + store volatile i32 %i6, i32 *%vptr + store volatile i32 %i7, i32 *%vptr + store volatile i32 %i8, i32 *%vptr + store volatile i32 %i9, i32 *%vptr + store volatile i32 %i10, i32 *%vptr + store volatile i32 %i11, i32 *%vptr + store volatile i32 %i12, i32 *%vptr + store volatile i32 %i13, i32 *%vptr + store volatile i32 %i14, i32 *%vptr + ret void +} + +; Repeat f4 in a case where the index register is already occupied. +define void @f12(i8 %byte, i64 %index) { +; CHECK-NOFP: f12: +; CHECK-NOFP: llilh %r1, 8 +; CHECK-NOFP: agr %r1, %r15 +; CHECK-NOFP: stc %r2, 0(%r3,%r1) +; CHECK-NOFP: br %r14 +; +; CHECK-FP: f12: +; CHECK-FP: llilh %r1, 8 +; CHECK-FP: agr %r1, %r11 +; CHECK-FP: stc %r2, 0(%r3,%r1) +; CHECK-FP: br %r14 + %region1 = alloca [524112 x i8], align 8 + %region2 = alloca [524112 x i8], align 8 + %index1 = add i64 %index, 8 + %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 %index1 + %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 %index1 + store volatile i8 %byte, i8 *%ptr1 + store volatile i8 %byte, i8 *%ptr2 + ret void +} diff --git a/test/CodeGen/SystemZ/frame-17.ll b/test/CodeGen/SystemZ/frame-17.ll new file mode 100644 index 0000000..613d9f8 --- /dev/null +++ b/test/CodeGen/SystemZ/frame-17.ll @@ -0,0 +1,177 @@ +; Test spilling of FPRs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; We need to save and restore 8 of the 16 FPRs and allocate an additional +; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly +; 160 + 8 * 8 = 232. +define void @f1(float *%ptr) { +; CHECK: f1: +; CHECK: aghi %r15, -232 +; CHECK: std %f8, 224(%r15) +; CHECK: std %f9, 216(%r15) +; CHECK: std %f10, 208(%r15) +; CHECK: std %f11, 200(%r15) +; CHECK: std %f12, 192(%r15) +; CHECK: std %f13, 184(%r15) +; CHECK: std %f14, 176(%r15) +; CHECK: std %f15, 168(%r15) +; CHECK-NOT: 160(%r15) +; CHECK: ste [[REGISTER:%f[0-9]+]], 164(%r15) +; CHECK-NOT: 160(%r15) +; CHECK: le [[REGISTER]], 164(%r15) +; CHECK-NOT: 160(%r15) +; CHECK: ld %f8, 224(%r15) +; CHECK: ld %f9, 216(%r15) +; CHECK: ld %f10, 208(%r15) +; CHECK: ld %f11, 200(%r15) +; CHECK: ld %f12, 192(%r15) +; CHECK: ld %f13, 184(%r15) +; CHECK: ld %f14, 176(%r15) +; CHECK: ld %f15, 168(%r15) +; CHECK: aghi %r15, 232 +; CHECK: br %r14 + %l0 = load volatile float *%ptr + %l1 = load volatile float *%ptr + %l2 = load volatile float *%ptr + %l3 = load volatile float *%ptr + %l4 = load volatile float *%ptr + %l5 = load volatile float *%ptr + %l6 = load volatile float *%ptr + %l7 = load volatile float *%ptr + %l8 = load volatile float *%ptr + %l9 = load volatile float *%ptr + %l10 = load volatile float *%ptr + %l11 = load volatile float *%ptr + %l12 = load volatile float *%ptr + %l13 = load volatile float *%ptr + %l14 = load volatile float *%ptr + %l15 = load volatile float *%ptr + %lx = load volatile float *%ptr + store volatile float %lx, float *%ptr + store volatile float %l15, float *%ptr + store volatile float %l14, float *%ptr + store volatile float %l13, float *%ptr + store volatile float %l12, float *%ptr + store volatile float %l11, float *%ptr + store volatile float %l10, float *%ptr + store volatile float %l9, float *%ptr + store volatile float %l8, float *%ptr + store volatile float %l7, float *%ptr + store volatile float %l6, float *%ptr + store volatile float %l5, float *%ptr + store volatile float %l4, float *%ptr + store volatile float %l3, float *%ptr + store volatile float %l2, float *%ptr + store volatile float %l1, float *%ptr + store volatile float %l0, float *%ptr + ret void +} + +; Same for doubles, except that the full spill slot is used. +define void @f2(double *%ptr) { +; CHECK: f2: +; CHECK: aghi %r15, -232 +; CHECK: std %f8, 224(%r15) +; CHECK: std %f9, 216(%r15) +; CHECK: std %f10, 208(%r15) +; CHECK: std %f11, 200(%r15) +; CHECK: std %f12, 192(%r15) +; CHECK: std %f13, 184(%r15) +; CHECK: std %f14, 176(%r15) +; CHECK: std %f15, 168(%r15) +; CHECK: std [[REGISTER:%f[0-9]+]], 160(%r15) +; CHECK: ld [[REGISTER]], 160(%r15) +; CHECK: ld %f8, 224(%r15) +; CHECK: ld %f9, 216(%r15) +; CHECK: ld %f10, 208(%r15) +; CHECK: ld %f11, 200(%r15) +; CHECK: ld %f12, 192(%r15) +; CHECK: ld %f13, 184(%r15) +; CHECK: ld %f14, 176(%r15) +; CHECK: ld %f15, 168(%r15) +; CHECK: aghi %r15, 232 +; CHECK: br %r14 + %l0 = load volatile double *%ptr + %l1 = load volatile double *%ptr + %l2 = load volatile double *%ptr + %l3 = load volatile double *%ptr + %l4 = load volatile double *%ptr + %l5 = load volatile double *%ptr + %l6 = load volatile double *%ptr + %l7 = load volatile double *%ptr + %l8 = load volatile double *%ptr + %l9 = load volatile double *%ptr + %l10 = load volatile double *%ptr + %l11 = load volatile double *%ptr + %l12 = load volatile double *%ptr + %l13 = load volatile double *%ptr + %l14 = load volatile double *%ptr + %l15 = load volatile double *%ptr + %lx = load volatile double *%ptr + store volatile double %lx, double *%ptr + store volatile double %l15, double *%ptr + store volatile double %l14, double *%ptr + store volatile double %l13, double *%ptr + store volatile double %l12, double *%ptr + store volatile double %l11, double *%ptr + store volatile double %l10, double *%ptr + store volatile double %l9, double *%ptr + store volatile double %l8, double *%ptr + store volatile double %l7, double *%ptr + store volatile double %l6, double *%ptr + store volatile double %l5, double *%ptr + store volatile double %l4, double *%ptr + store volatile double %l3, double *%ptr + store volatile double %l2, double *%ptr + store volatile double %l1, double *%ptr + store volatile double %l0, double *%ptr + ret void +} + +; The long double case needs a 16-byte spill slot. +define void @f3(fp128 *%ptr) { +; CHECK: f3: +; CHECK: aghi %r15, -240 +; CHECK: std %f8, 232(%r15) +; CHECK: std %f9, 224(%r15) +; CHECK: std %f10, 216(%r15) +; CHECK: std %f11, 208(%r15) +; CHECK: std %f12, 200(%r15) +; CHECK: std %f13, 192(%r15) +; CHECK: std %f14, 184(%r15) +; CHECK: std %f15, 176(%r15) +; CHECK: std [[REGISTER1:%f[0-9]+]], 160(%r15) +; CHECK: std [[REGISTER2:%f[0-9]+]], 168(%r15) +; CHECK: ld [[REGISTER1]], 160(%r15) +; CHECK: ld [[REGISTER2]], 168(%r15) +; CHECK: ld %f8, 232(%r15) +; CHECK: ld %f9, 224(%r15) +; CHECK: ld %f10, 216(%r15) +; CHECK: ld %f11, 208(%r15) +; CHECK: ld %f12, 200(%r15) +; CHECK: ld %f13, 192(%r15) +; CHECK: ld %f14, 184(%r15) +; CHECK: ld %f15, 176(%r15) +; CHECK: aghi %r15, 240 +; CHECK: br %r14 + %l0 = load volatile fp128 *%ptr + %l1 = load volatile fp128 *%ptr + %l4 = load volatile fp128 *%ptr + %l5 = load volatile fp128 *%ptr + %l8 = load volatile fp128 *%ptr + %l9 = load volatile fp128 *%ptr + %l12 = load volatile fp128 *%ptr + %l13 = load volatile fp128 *%ptr + %lx = load volatile fp128 *%ptr + store volatile fp128 %lx, fp128 *%ptr + store volatile fp128 %l13, fp128 *%ptr + store volatile fp128 %l12, fp128 *%ptr + store volatile fp128 %l9, fp128 *%ptr + store volatile fp128 %l8, fp128 *%ptr + store volatile fp128 %l5, fp128 *%ptr + store volatile fp128 %l4, fp128 *%ptr + store volatile fp128 %l1, fp128 *%ptr + store volatile fp128 %l0, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/frame-18.ll b/test/CodeGen/SystemZ/frame-18.ll new file mode 100644 index 0000000..a9977ed --- /dev/null +++ b/test/CodeGen/SystemZ/frame-18.ll @@ -0,0 +1,91 @@ +; Test spilling of GPRs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; We need to allocate a 4-byte spill slot, rounded to 8 bytes. The frame +; size should be exactly 160 + 8 = 168. +define void @f1(i32 *%ptr) { +; CHECK: f1: +; CHECK: stmg %r6, %r15, 48(%r15) +; CHECK: aghi %r15, -168 +; CHECK-NOT: 160(%r15) +; CHECK: st [[REGISTER:%r[0-9]+]], 164(%r15) +; CHECK-NOT: 160(%r15) +; CHECK: l [[REGISTER]], 164(%r15) +; CHECK-NOT: 160(%r15) +; CHECK: lmg %r6, %r15, 216(%r15) +; CHECK: br %r14 + %l0 = load volatile i32 *%ptr + %l1 = load volatile i32 *%ptr + %l3 = load volatile i32 *%ptr + %l4 = load volatile i32 *%ptr + %l5 = load volatile i32 *%ptr + %l6 = load volatile i32 *%ptr + %l7 = load volatile i32 *%ptr + %l8 = load volatile i32 *%ptr + %l9 = load volatile i32 *%ptr + %l10 = load volatile i32 *%ptr + %l11 = load volatile i32 *%ptr + %l12 = load volatile i32 *%ptr + %l13 = load volatile i32 *%ptr + %l14 = load volatile i32 *%ptr + %lx = load volatile i32 *%ptr + store volatile i32 %lx, i32 *%ptr + store volatile i32 %l14, i32 *%ptr + store volatile i32 %l13, i32 *%ptr + store volatile i32 %l12, i32 *%ptr + store volatile i32 %l11, i32 *%ptr + store volatile i32 %l10, i32 *%ptr + store volatile i32 %l9, i32 *%ptr + store volatile i32 %l8, i32 *%ptr + store volatile i32 %l7, i32 *%ptr + store volatile i32 %l6, i32 *%ptr + store volatile i32 %l5, i32 *%ptr + store volatile i32 %l4, i32 *%ptr + store volatile i32 %l3, i32 *%ptr + store volatile i32 %l1, i32 *%ptr + store volatile i32 %l0, i32 *%ptr + ret void +} + +; Same for i64, except that the full spill slot is used. +define void @f2(i64 *%ptr) { +; CHECK: f2: +; CHECK: stmg %r6, %r15, 48(%r15) +; CHECK: aghi %r15, -168 +; CHECK: stg [[REGISTER:%r[0-9]+]], 160(%r15) +; CHECK: lg [[REGISTER]], 160(%r15) +; CHECK: lmg %r6, %r15, 216(%r15) +; CHECK: br %r14 + %l0 = load volatile i64 *%ptr + %l1 = load volatile i64 *%ptr + %l3 = load volatile i64 *%ptr + %l4 = load volatile i64 *%ptr + %l5 = load volatile i64 *%ptr + %l6 = load volatile i64 *%ptr + %l7 = load volatile i64 *%ptr + %l8 = load volatile i64 *%ptr + %l9 = load volatile i64 *%ptr + %l10 = load volatile i64 *%ptr + %l11 = load volatile i64 *%ptr + %l12 = load volatile i64 *%ptr + %l13 = load volatile i64 *%ptr + %l14 = load volatile i64 *%ptr + %lx = load volatile i64 *%ptr + store volatile i64 %lx, i64 *%ptr + store volatile i64 %l14, i64 *%ptr + store volatile i64 %l13, i64 *%ptr + store volatile i64 %l12, i64 *%ptr + store volatile i64 %l11, i64 *%ptr + store volatile i64 %l10, i64 *%ptr + store volatile i64 %l9, i64 *%ptr + store volatile i64 %l8, i64 *%ptr + store volatile i64 %l7, i64 *%ptr + store volatile i64 %l6, i64 *%ptr + store volatile i64 %l5, i64 *%ptr + store volatile i64 %l4, i64 *%ptr + store volatile i64 %l3, i64 *%ptr + store volatile i64 %l1, i64 *%ptr + store volatile i64 %l0, i64 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/insert-01.ll b/test/CodeGen/SystemZ/insert-01.ll new file mode 100644 index 0000000..98ddf56 --- /dev/null +++ b/test/CodeGen/SystemZ/insert-01.ll @@ -0,0 +1,230 @@ +; Test insertions of memory into the low byte of an i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check a plain insertion with (or (and ... -0xff) (zext (load ....))). +; The whole sequence can be performed by IC. +define i32 @f1(i32 %orig, i8 *%ptr) { +; CHECK: f1: +; CHECK-NOT: ni +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i32 + %ptr1 = and i32 %orig, -256 + %or = or i32 %ptr1, %ptr2 + ret i32 %or +} + +; Like f1, but with the operands reversed. +define i32 @f2(i32 %orig, i8 *%ptr) { +; CHECK: f2: +; CHECK-NOT: ni +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i32 + %ptr1 = and i32 %orig, -256 + %or = or i32 %ptr2, %ptr1 + ret i32 %or +} + +; Check a case where more bits than lower 8 are masked out of the +; register value. We can use IC but must keep the original mask. +define i32 @f3(i32 %orig, i8 *%ptr) { +; CHECK: f3: +; CHECK: nill %r2, 65024 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i32 + %ptr1 = and i32 %orig, -512 + %or = or i32 %ptr1, %ptr2 + ret i32 %or +} + +; Like f3, but with the operands reversed. +define i32 @f4(i32 %orig, i8 *%ptr) { +; CHECK: f4: +; CHECK: nill %r2, 65024 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i32 + %ptr1 = and i32 %orig, -512 + %or = or i32 %ptr2, %ptr1 + ret i32 %or +} + +; Check a case where the low 8 bits are cleared by a shift left. +define i32 @f5(i32 %orig, i8 *%ptr) { +; CHECK: f5: +; CHECK: sll %r2, 8 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i32 + %ptr1 = shl i32 %orig, 8 + %or = or i32 %ptr1, %ptr2 + ret i32 %or +} + +; Like f5, but with the operands reversed. +define i32 @f6(i32 %orig, i8 *%ptr) { +; CHECK: f6: +; CHECK: sll %r2, 8 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i32 + %ptr1 = shl i32 %orig, 8 + %or = or i32 %ptr2, %ptr1 + ret i32 %or +} + +; Check insertions into a constant. +define i32 @f7(i32 %orig, i8 *%ptr) { +; CHECK: f7: +; CHECK: lhi %r2, 256 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i32 + %or = or i32 %ptr2, 256 + ret i32 %or +} + +; Like f7, but with the operands reversed. +define i32 @f8(i32 %orig, i8 *%ptr) { +; CHECK: f8: +; CHECK: lhi %r2, 256 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i32 + %or = or i32 256, %ptr2 + ret i32 %or +} + +; Check the high end of the IC range. +define i32 @f9(i32 %orig, i8 *%src) { +; CHECK: f9: +; CHECK: ic %r2, 4095(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4095 + %val = load i8 *%ptr + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} + +; Check the next byte up, which should use ICY instead of IC. +define i32 @f10(i32 %orig, i8 *%src) { +; CHECK: f10: +; CHECK: icy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4096 + %val = load i8 *%ptr + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} + +; Check the high end of the ICY range. +define i32 @f11(i32 %orig, i8 *%src) { +; CHECK: f11: +; CHECK: icy %r2, 524287(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %val = load i8 *%ptr + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f12(i32 %orig, i8 *%src) { +; CHECK: f12: +; CHECK: agfi %r3, 524288 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %val = load i8 *%ptr + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} + +; Check the high end of the negative ICY range. +define i32 @f13(i32 %orig, i8 *%src) { +; CHECK: f13: +; CHECK: icy %r2, -1(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %val = load i8 *%ptr + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} + +; Check the low end of the ICY range. +define i32 @f14(i32 %orig, i8 *%src) { +; CHECK: f14: +; CHECK: icy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %val = load i8 *%ptr + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f15(i32 %orig, i8 *%src) { +; CHECK: f15: +; CHECK: agfi %r3, -524289 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %val = load i8 *%ptr + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} + +; Check that IC allows an index. +define i32 @f16(i32 %orig, i8 *%src, i64 %index) { +; CHECK: f16: +; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %ptr1 = getelementptr i8 *%src, i64 %index + %ptr2 = getelementptr i8 *%ptr1, i64 4095 + %val = load i8 *%ptr2 + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} + +; Check that ICY allows an index. +define i32 @f17(i32 %orig, i8 *%src, i64 %index) { +; CHECK: f17: +; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %ptr1 = getelementptr i8 *%src, i64 %index + %ptr2 = getelementptr i8 *%ptr1, i64 4096 + %val = load i8 *%ptr2 + %src2 = zext i8 %val to i32 + %src1 = and i32 %orig, -256 + %or = or i32 %src2, %src1 + ret i32 %or +} diff --git a/test/CodeGen/SystemZ/insert-02.ll b/test/CodeGen/SystemZ/insert-02.ll new file mode 100644 index 0000000..471889d --- /dev/null +++ b/test/CodeGen/SystemZ/insert-02.ll @@ -0,0 +1,230 @@ +; Test insertions of memory into the low byte of an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check a plain insertion with (or (and ... -0xff) (zext (load ....))). +; The whole sequence can be performed by IC. +define i64 @f1(i64 %orig, i8 *%ptr) { +; CHECK: f1: +; CHECK-NOT: ni +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i64 + %ptr1 = and i64 %orig, -256 + %or = or i64 %ptr1, %ptr2 + ret i64 %or +} + +; Like f1, but with the operands reversed. +define i64 @f2(i64 %orig, i8 *%ptr) { +; CHECK: f2: +; CHECK-NOT: ni +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i64 + %ptr1 = and i64 %orig, -256 + %or = or i64 %ptr2, %ptr1 + ret i64 %or +} + +; Check a case where more bits than lower 8 are masked out of the +; register value. We can use IC but must keep the original mask. +define i64 @f3(i64 %orig, i8 *%ptr) { +; CHECK: f3: +; CHECK: nill %r2, 65024 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i64 + %ptr1 = and i64 %orig, -512 + %or = or i64 %ptr1, %ptr2 + ret i64 %or +} + +; Like f3, but with the operands reversed. +define i64 @f4(i64 %orig, i8 *%ptr) { +; CHECK: f4: +; CHECK: nill %r2, 65024 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i64 + %ptr1 = and i64 %orig, -512 + %or = or i64 %ptr2, %ptr1 + ret i64 %or +} + +; Check a case where the low 8 bits are cleared by a shift left. +define i64 @f5(i64 %orig, i8 *%ptr) { +; CHECK: f5: +; CHECK: sllg %r2, %r2, 8 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i64 + %ptr1 = shl i64 %orig, 8 + %or = or i64 %ptr1, %ptr2 + ret i64 %or +} + +; Like f5, but with the operands reversed. +define i64 @f6(i64 %orig, i8 *%ptr) { +; CHECK: f6: +; CHECK: sllg %r2, %r2, 8 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i64 + %ptr1 = shl i64 %orig, 8 + %or = or i64 %ptr2, %ptr1 + ret i64 %or +} + +; Check insertions into a constant. +define i64 @f7(i64 %orig, i8 *%ptr) { +; CHECK: f7: +; CHECK: lghi %r2, 256 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i64 + %or = or i64 %ptr2, 256 + ret i64 %or +} + +; Like f7, but with the operands reversed. +define i64 @f8(i64 %orig, i8 *%ptr) { +; CHECK: f8: +; CHECK: lghi %r2, 256 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %val = load i8 *%ptr + %ptr2 = zext i8 %val to i64 + %or = or i64 256, %ptr2 + ret i64 %or +} + +; Check the high end of the IC range. +define i64 @f9(i64 %orig, i8 *%src) { +; CHECK: f9: +; CHECK: ic %r2, 4095(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4095 + %val = load i8 *%ptr + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} + +; Check the next byte up, which should use ICY instead of IC. +define i64 @f10(i64 %orig, i8 *%src) { +; CHECK: f10: +; CHECK: icy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4096 + %val = load i8 *%ptr + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} + +; Check the high end of the ICY range. +define i64 @f11(i64 %orig, i8 *%src) { +; CHECK: f11: +; CHECK: icy %r2, 524287(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %val = load i8 *%ptr + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f12(i64 %orig, i8 *%src) { +; CHECK: f12: +; CHECK: agfi %r3, 524288 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %val = load i8 *%ptr + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} + +; Check the high end of the negative ICY range. +define i64 @f13(i64 %orig, i8 *%src) { +; CHECK: f13: +; CHECK: icy %r2, -1(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %val = load i8 *%ptr + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} + +; Check the low end of the ICY range. +define i64 @f14(i64 %orig, i8 *%src) { +; CHECK: f14: +; CHECK: icy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %val = load i8 *%ptr + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f15(i64 %orig, i8 *%src) { +; CHECK: f15: +; CHECK: agfi %r3, -524289 +; CHECK: ic %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %val = load i8 *%ptr + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} + +; Check that IC allows an index. +define i64 @f16(i64 %orig, i8 *%src, i64 %index) { +; CHECK: f16: +; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %ptr1 = getelementptr i8 *%src, i64 %index + %ptr2 = getelementptr i8 *%ptr1, i64 4095 + %val = load i8 *%ptr2 + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} + +; Check that ICY allows an index. +define i64 @f17(i64 %orig, i8 *%src, i64 %index) { +; CHECK: f17: +; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %ptr1 = getelementptr i8 *%src, i64 %index + %ptr2 = getelementptr i8 *%ptr1, i64 4096 + %val = load i8 *%ptr2 + %src2 = zext i8 %val to i64 + %src1 = and i64 %orig, -256 + %or = or i64 %src2, %src1 + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/insert-03.ll b/test/CodeGen/SystemZ/insert-03.ll new file mode 100644 index 0000000..261eabd --- /dev/null +++ b/test/CodeGen/SystemZ/insert-03.ll @@ -0,0 +1,71 @@ +; Test insertions of 16-bit constants into one half of an i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful IILL value. (We use NILL rather than IILL +; to clear 16 bits.) +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK-NOT: ni +; CHECK: iill %r2, 1 +; CHECK: br %r14 + %and = and i32 %a, 4294901760 + %or = or i32 %and, 1 + ret i32 %or +} + +; Check a middle value. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK-NOT: ni +; CHECK: iill %r2, 32769 +; CHECK: br %r14 + %and = and i32 %a, -65536 + %or = or i32 %and, 32769 + ret i32 %or +} + +; Check the highest useful IILL value. (We use OILL rather than IILL +; to set 16 bits.) +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK-NOT: ni +; CHECK: iill %r2, 65534 +; CHECK: br %r14 + %and = and i32 %a, 4294901760 + %or = or i32 %and, 65534 + ret i32 %or +} + +; Check the lowest useful IILH value. +define i32 @f4(i32 %a) { +; CHECK: f4: +; CHECK-NOT: ni +; CHECK: iilh %r2, 1 +; CHECK: br %r14 + %and = and i32 %a, 65535 + %or = or i32 %and, 65536 + ret i32 %or +} + +; Check a middle value. +define i32 @f5(i32 %a) { +; CHECK: f5: +; CHECK-NOT: ni +; CHECK: iilh %r2, 32767 +; CHECK: br %r14 + %and = and i32 %a, 65535 + %or = or i32 %and, 2147418112 + ret i32 %or +} + +; Check the highest useful IILH value. +define i32 @f6(i32 %a) { +; CHECK: f6: +; CHECK-NOT: ni +; CHECK: iilh %r2, 65534 +; CHECK: br %r14 + %and = and i32 %a, 65535 + %or = or i32 %and, -131072 + ret i32 %or +} diff --git a/test/CodeGen/SystemZ/insert-04.ll b/test/CodeGen/SystemZ/insert-04.ll new file mode 100644 index 0000000..07f88b9 --- /dev/null +++ b/test/CodeGen/SystemZ/insert-04.ll @@ -0,0 +1,137 @@ +; Test insertions of 16-bit constants into an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful IILL value. (We use NILL rather than IILL +; to clear 16 bits.) +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK-NOT: ni +; CHECK: iill %r2, 1 +; CHECK: br %r14 + %and = and i64 %a, 18446744073709486080 + %or = or i64 %and, 1 + ret i64 %or +} + +; Check a middle value. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK-NOT: ni +; CHECK: iill %r2, 32769 +; CHECK: br %r14 + %and = and i64 %a, -65536 + %or = or i64 %and, 32769 + ret i64 %or +} + +; Check the highest useful IILL value. (We use OILL rather than IILL +; to set 16 bits.) +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK-NOT: ni +; CHECK: iill %r2, 65534 +; CHECK: br %r14 + %and = and i64 %a, 18446744073709486080 + %or = or i64 %and, 65534 + ret i64 %or +} + +; Check the lowest useful IILH value. +define i64 @f4(i64 %a) { +; CHECK: f4: +; CHECK-NOT: ni +; CHECK: iilh %r2, 1 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414649855 + %or = or i64 %and, 65536 + ret i64 %or +} + +; Check a middle value. +define i64 @f5(i64 %a) { +; CHECK: f5: +; CHECK-NOT: ni +; CHECK: iilh %r2, 32767 +; CHECK: br %r14 + %and = and i64 %a, -4294901761 + %or = or i64 %and, 2147418112 + ret i64 %or +} + +; Check the highest useful IILH value. +define i64 @f6(i64 %a) { +; CHECK: f6: +; CHECK-NOT: ni +; CHECK: iilh %r2, 65534 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414649855 + %or = or i64 %and, 4294836224 + ret i64 %or +} + +; Check the lowest useful IIHL value. +define i64 @f7(i64 %a) { +; CHECK: f7: +; CHECK-NOT: ni +; CHECK: iihl %r2, 1 +; CHECK: br %r14 + %and = and i64 %a, 18446462603027808255 + %or = or i64 %and, 4294967296 + ret i64 %or +} + +; Check a middle value. +define i64 @f8(i64 %a) { +; CHECK: f8: +; CHECK-NOT: ni +; CHECK: iihl %r2, 32767 +; CHECK: br %r14 + %and = and i64 %a, -281470681743361 + %or = or i64 %and, 140733193388032 + ret i64 %or +} + +; Check the highest useful IIHL value. +define i64 @f9(i64 %a) { +; CHECK: f9: +; CHECK-NOT: ni +; CHECK: iihl %r2, 65534 +; CHECK: br %r14 + %and = and i64 %a, 18446462603027808255 + %or = or i64 %and, 281466386776064 + ret i64 %or +} + +; Check the lowest useful IIHH value. +define i64 @f10(i64 %a) { +; CHECK: f10: +; CHECK-NOT: ni +; CHECK: iihh %r2, 1 +; CHECK: br %r14 + %and = and i64 %a, 281474976710655 + %or = or i64 %and, 281474976710656 + ret i64 %or +} + +; Check a middle value. +define i64 @f11(i64 %a) { +; CHECK: f11: +; CHECK-NOT: ni +; CHECK: iihh %r2, 32767 +; CHECK: br %r14 + %and = and i64 %a, 281474976710655 + %or = or i64 %and, 9223090561878065152 + ret i64 %or +} + +; Check the highest useful IIHH value. +define i64 @f12(i64 %a) { +; CHECK: f12: +; CHECK-NOT: ni +; CHECK: iihh %r2, 65534 +; CHECK: br %r14 + %and = and i64 %a, 281474976710655 + %or = or i64 %and, 18446181123756130304 + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/insert-05.ll b/test/CodeGen/SystemZ/insert-05.ll new file mode 100644 index 0000000..da51676 --- /dev/null +++ b/test/CodeGen/SystemZ/insert-05.ll @@ -0,0 +1,224 @@ +; Test insertions of 32-bit constants into one half of an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Prefer LHI over IILF for signed 16-bit constants. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK-NOT: ni +; CHECK: lhi %r2, 1 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414584320 + %or = or i64 %and, 1 + ret i64 %or +} + +; Check the high end of the LHI range. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK-NOT: ni +; CHECK: lhi %r2, 32767 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414584320 + %or = or i64 %and, 32767 + ret i64 %or +} + +; Check the next value up, which should use IILF instead. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK-NOT: ni +; CHECK: iilf %r2, 32768 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414584320 + %or = or i64 %and, 32768 + ret i64 %or +} + +; Check a value in which the lower 16 bits are clear. +define i64 @f4(i64 %a) { +; CHECK: f4: +; CHECK-NOT: ni +; CHECK: iilf %r2, 65536 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414584320 + %or = or i64 %and, 65536 + ret i64 %or +} + +; Check the highest useful IILF value (-0x8001). +define i64 @f5(i64 %a) { +; CHECK: f5: +; CHECK-NOT: ni +; CHECK: iilf %r2, 4294934527 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414584320 + %or = or i64 %and, 4294934527 + ret i64 %or +} + +; Check the next value up, which should use LHI instead. +define i64 @f6(i64 %a) { +; CHECK: f6: +; CHECK-NOT: ni +; CHECK: lhi %r2, -32768 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414584320 + %or = or i64 %and, 4294934528 + ret i64 %or +} + +; Check the highest useful LHI value. (We use OILF for -1 instead, although +; LHI might be better there too.) +define i64 @f7(i64 %a) { +; CHECK: f7: +; CHECK-NOT: ni +; CHECK: lhi %r2, -2 +; CHECK: br %r14 + %and = and i64 %a, 18446744069414584320 + %or = or i64 %and, 4294967294 + ret i64 %or +} + +; Check that SRLG is still used if some of the high bits are known to be 0 +; (and so might be removed from the mask). +define i64 @f8(i64 %a) { +; CHECK: f8: +; CHECK: srlg %r2, %r2, 1 +; CHECK-NEXT: iilf %r2, 32768 +; CHECK: br %r14 + %shifted = lshr i64 %a, 1 + %and = and i64 %shifted, 18446744069414584320 + %or = or i64 %and, 32768 + ret i64 %or +} + +; Repeat f8 with addition, which is known to be equivalent to OR in this case. +define i64 @f9(i64 %a) { +; CHECK: f9: +; CHECK: srlg %r2, %r2, 1 +; CHECK-NEXT: iilf %r2, 32768 +; CHECK: br %r14 + %shifted = lshr i64 %a, 1 + %and = and i64 %shifted, 18446744069414584320 + %or = add i64 %and, 32768 + ret i64 %or +} + +; Repeat f8 with already-zero bits removed from the mask. +define i64 @f10(i64 %a) { +; CHECK: f10: +; CHECK: srlg %r2, %r2, 1 +; CHECK-NEXT: iilf %r2, 32768 +; CHECK: br %r14 + %shifted = lshr i64 %a, 1 + %and = and i64 %shifted, 9223372032559808512 + %or = or i64 %and, 32768 + ret i64 %or +} + +; Repeat f10 with addition, which is known to be equivalent to OR in this case. +define i64 @f11(i64 %a) { +; CHECK: f11: +; CHECK: srlg %r2, %r2, 1 +; CHECK-NEXT: iilf %r2, 32768 +; CHECK: br %r14 + %shifted = lshr i64 %a, 1 + %and = and i64 %shifted, 9223372032559808512 + %or = add i64 %and, 32768 + ret i64 %or +} + +; Check the lowest useful IIHF value. +define i64 @f12(i64 %a) { +; CHECK: f12: +; CHECK-NOT: ni +; CHECK: iihf %r2, 1 +; CHECK: br %r14 + %and = and i64 %a, 4294967295 + %or = or i64 %and, 4294967296 + ret i64 %or +} + +; Check a value in which the lower 16 bits are clear. +define i64 @f13(i64 %a) { +; CHECK: f13: +; CHECK-NOT: ni +; CHECK: iihf %r2, 2147483648 +; CHECK: br %r14 + %and = and i64 %a, 4294967295 + %or = or i64 %and, 9223372036854775808 + ret i64 %or +} + +; Check the highest useful IIHF value (0xfffffffe). +define i64 @f14(i64 %a) { +; CHECK: f14: +; CHECK-NOT: ni +; CHECK: iihf %r2, 4294967294 +; CHECK: br %r14 + %and = and i64 %a, 4294967295 + %or = or i64 %and, 18446744065119617024 + ret i64 %or +} + +; Check a case in which some of the low 32 bits are known to be clear, +; and so could be removed from the AND mask. +define i64 @f15(i64 %a) { +; CHECK: f15: +; CHECK: sllg %r2, %r2, 1 +; CHECK-NEXT: iihf %r2, 1 +; CHECK: br %r14 + %shifted = shl i64 %a, 1 + %and = and i64 %shifted, 4294967295 + %or = or i64 %and, 4294967296 + ret i64 %or +} + +; Repeat f15 with the zero bits explicitly removed from the mask. +define i64 @f16(i64 %a) { +; CHECK: f16: +; CHECK: sllg %r2, %r2, 1 +; CHECK-NEXT: iihf %r2, 1 +; CHECK: br %r14 + %shifted = shl i64 %a, 1 + %and = and i64 %shifted, 4294967294 + %or = or i64 %and, 4294967296 + ret i64 %or +} + +; Check concatenation of two i32s. +define i64 @f17(i32 %a) { +; CHECK: f17: +; CHECK: msr %r2, %r2 +; CHECK-NEXT: iihf %r2, 1 +; CHECK: br %r14 + %mul = mul i32 %a, %a + %ext = zext i32 %mul to i64 + %or = or i64 %ext, 4294967296 + ret i64 %or +} + +; Repeat f17 with the operands reversed. +define i64 @f18(i32 %a) { +; CHECK: f18: +; CHECK: msr %r2, %r2 +; CHECK-NEXT: iihf %r2, 1 +; CHECK: br %r14 + %mul = mul i32 %a, %a + %ext = zext i32 %mul to i64 + %or = or i64 4294967296, %ext + ret i64 %or +} + +; The truncation here isn't free; we need an explicit zero extension. +define i64 @f19(i32 %a) { +; CHECK: f19: +; CHECK: llgcr %r2, %r2 +; CHECK: oihl %r2, 1 +; CHECK: br %r14 + %trunc = trunc i32 %a to i8 + %ext = zext i8 %trunc to i64 + %or = or i64 %ext, 4294967296 + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/insert-06.ll b/test/CodeGen/SystemZ/insert-06.ll new file mode 100644 index 0000000..4a13ef4 --- /dev/null +++ b/test/CodeGen/SystemZ/insert-06.ll @@ -0,0 +1,167 @@ +; Test insertions of i32s into the low half of an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Insertion of an i32 can be done using LR. +define i64 @f1(i64 %a, i32 %b) { +; CHECK: f1: +; CHECK-NOT: {{%r[23]}} +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %low = zext i32 %b to i64 + %high = and i64 %a, -4294967296 + %res = or i64 %high, %low + ret i64 %res +} + +; ... and again with the operands reversed. +define i64 @f2(i64 %a, i32 %b) { +; CHECK: f2: +; CHECK-NOT: {{%r[23]}} +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %low = zext i32 %b to i64 + %high = and i64 %a, -4294967296 + %res = or i64 %low, %high + ret i64 %res +} + +; Like f1, but with "in register" zero extension. +define i64 @f3(i64 %a, i64 %b) { +; CHECK: f3: +; CHECK-NOT: {{%r[23]}} +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %low = and i64 %b, 4294967295 + %high = and i64 %a, -4294967296 + %res = or i64 %high, %low + ret i64 %res +} + +; ... and again with the operands reversed. +define i64 @f4(i64 %a, i64 %b) { +; CHECK: f4: +; CHECK-NOT: {{%r[23]}} +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %low = and i64 %b, 4294967295 + %high = and i64 %a, -4294967296 + %res = or i64 %low, %high + ret i64 %res +} + +; Unary operations can be done directly into the low half. +define i64 @f5(i64 %a, i32 %b) { +; CHECK: f5: +; CHECK-NOT: {{%r[23]}} +; CHECK: lcr %r2, %r3 +; CHECK: br %r14 + %neg = sub i32 0, %b + %low = zext i32 %neg to i64 + %high = and i64 %a, -4294967296 + %res = or i64 %high, %low + ret i64 %res +} + +; ...likewise three-operand binary operations like RLL. +define i64 @f6(i64 %a, i32 %b) { +; CHECK: f6: +; CHECK-NOT: {{%r[23]}} +; CHECK: rll %r2, %r3, 1 +; CHECK: br %r14 + %parta = shl i32 %b, 1 + %partb = lshr i32 %b, 31 + %rot = or i32 %parta, %partb + %low = zext i32 %rot to i64 + %high = and i64 %a, -4294967296 + %res = or i64 %low, %high + ret i64 %res +} + +; Loads can be done directly into the low half. The range of L is checked +; in the move tests. +define i64 @f7(i64 %a, i32 *%src) { +; CHECK: f7: +; CHECK-NOT: {{%r[23]}} +; CHECK: l %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %low = zext i32 %b to i64 + %high = and i64 %a, -4294967296 + %res = or i64 %high, %low + ret i64 %res +} + +; ...likewise extending loads. +define i64 @f8(i64 %a, i8 *%src) { +; CHECK: f8: +; CHECK-NOT: {{%r[23]}} +; CHECK: lb %r2, 0(%r3) +; CHECK: br %r14 + %byte = load i8 *%src + %b = sext i8 %byte to i32 + %low = zext i32 %b to i64 + %high = and i64 %a, -4294967296 + %res = or i64 %high, %low + ret i64 %res +} + +; Check a case like f1 in which there is no AND. We simply know from context +; that the upper half of one OR operand and the lower half of the other are +; both clear. +define i64 @f9(i64 %a, i32 %b) { +; CHECK: f9: +; CHECK: sllg %r2, %r2, 32 +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %shift = shl i64 %a, 32 + %low = zext i32 %b to i64 + %or = or i64 %shift, %low + ret i64 %or +} + +; ...and again with the operands reversed. +define i64 @f10(i64 %a, i32 %b) { +; CHECK: f10: +; CHECK: sllg %r2, %r2, 32 +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %shift = shl i64 %a, 32 + %low = zext i32 %b to i64 + %or = or i64 %low, %shift + ret i64 %or +} + +; Like f9, but with "in register" zero extension. +define i64 @f11(i64 %a, i64 %b) { +; CHECK: f11: +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %shift = shl i64 %a, 32 + %low = and i64 %b, 4294967295 + %or = or i64 %shift, %low + ret i64 %or +} + +; ...and again with the operands reversed. +define i64 @f12(i64 %a, i64 %b) { +; CHECK: f12: +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %shift = shl i64 %a, 32 + %low = and i64 %b, 4294967295 + %or = or i64 %low, %shift + ret i64 %or +} + +; Like f9, but for larger shifts than 32. +define i64 @f13(i64 %a, i32 %b) { +; CHECK: f13: +; CHECK: sllg %r2, %r2, 60 +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + %shift = shl i64 %a, 60 + %low = zext i32 %b to i64 + %or = or i64 %shift, %low + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/int-add-01.ll b/test/CodeGen/SystemZ/int-add-01.ll new file mode 100644 index 0000000..d12ac22 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-01.ll @@ -0,0 +1,131 @@ +; Test 32-bit addition in which the second operand is a sign-extended +; i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the AH range. +define i32 @f1(i32 %lhs, i16 *%src) { +; CHECK: f1: +; CHECK: ah %r2, 0(%r3) +; CHECK: br %r14 + %half = load i16 *%src + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the aligned AH range. +define i32 @f2(i32 %lhs, i16 *%src) { +; CHECK: f2: +; CHECK: ah %r2, 4094(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2047 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword up, which should use AHY instead of AH. +define i32 @f3(i32 %lhs, i16 *%src) { +; CHECK: f3: +; CHECK: ahy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2048 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the aligned AHY range. +define i32 @f4(i32 %lhs, i16 *%src) { +; CHECK: f4: +; CHECK: ahy %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f5(i32 %lhs, i16 *%src) { +; CHECK: f5: +; CHECK: agfi %r3, 524288 +; CHECK: ah %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the negative aligned AHY range. +define i32 @f6(i32 %lhs, i16 *%src) { +; CHECK: f6: +; CHECK: ahy %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check the low end of the AHY range. +define i32 @f7(i32 %lhs, i16 *%src) { +; CHECK: f7: +; CHECK: ahy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f8(i32 %lhs, i16 *%src) { +; CHECK: f8: +; CHECK: agfi %r3, -524290 +; CHECK: ah %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check that AH allows an index. +define i32 @f9(i32 %lhs, i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: ah %r2, 4094({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} + +; Check that AHY allows an index. +define i32 @f10(i32 %lhs, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: ahy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = add i32 %lhs, %rhs + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-add-02.ll b/test/CodeGen/SystemZ/int-add-02.ll new file mode 100644 index 0000000..568ad1c --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-02.ll @@ -0,0 +1,129 @@ +; Test 32-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check AR. +define i32 @f1(i32 %a, i32 %b) { +; CHECK: f1: +; CHECK: ar %r2, %r3 +; CHECK: br %r14 + %add = add i32 %a, %b + ret i32 %add +} + +; Check the low end of the A range. +define i32 @f2(i32 %a, i32 *%src) { +; CHECK: f2: +; CHECK: a %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %add = add i32 %a, %b + ret i32 %add +} + +; Check the high end of the aligned A range. +define i32 @f3(i32 %a, i32 *%src) { +; CHECK: f3: +; CHECK: a %r2, 4092(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} + +; Check the next word up, which should use AY instead of A. +define i32 @f4(i32 %a, i32 *%src) { +; CHECK: f4: +; CHECK: ay %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} + +; Check the high end of the aligned AY range. +define i32 @f5(i32 %a, i32 *%src) { +; CHECK: f5: +; CHECK: ay %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i32 %a, i32 *%src) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: a %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} + +; Check the high end of the negative aligned AY range. +define i32 @f7(i32 %a, i32 *%src) { +; CHECK: f7: +; CHECK: ay %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} + +; Check the low end of the AY range. +define i32 @f8(i32 %a, i32 *%src) { +; CHECK: f8: +; CHECK: ay %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f9(i32 %a, i32 *%src) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: a %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} + +; Check that A allows an index. +define i32 @f10(i32 %a, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: a %r2, 4092({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} + +; Check that AY allows an index. +define i32 @f11(i32 %a, i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: ay %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %add = add i32 %a, %b + ret i32 %add +} diff --git a/test/CodeGen/SystemZ/int-add-03.ll b/test/CodeGen/SystemZ/int-add-03.ll new file mode 100644 index 0000000..4610357 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-03.ll @@ -0,0 +1,102 @@ +; Test additions between an i64 and a sign-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check AGFR. +define i64 @f1(i64 %a, i32 %b) { +; CHECK: f1: +; CHECK: agfr %r2, %r3 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check AGF with no displacement. +define i64 @f2(i64 %a, i32 *%src) { +; CHECK: f2: +; CHECK: agf %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %bext = sext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the high end of the aligned AGF range. +define i64 @f3(i64 %a, i32 *%src) { +; CHECK: f3: +; CHECK: agf %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i32 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: agf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the high end of the negative aligned AGF range. +define i64 @f5(i64 %a, i32 *%src) { +; CHECK: f5: +; CHECK: agf %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the low end of the AGF range. +define i64 @f6(i64 %a, i32 *%src) { +; CHECK: f6: +; CHECK: agf %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i32 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524292 +; CHECK: agf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check that AGF allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: agf %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} diff --git a/test/CodeGen/SystemZ/int-add-04.ll b/test/CodeGen/SystemZ/int-add-04.ll new file mode 100644 index 0000000..1c2dc76 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-04.ll @@ -0,0 +1,102 @@ +; Test additions between an i64 and a zero-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ALGFR. +define i64 @f1(i64 %a, i32 %b) { +; CHECK: f1: +; CHECK: algfr %r2, %r3 +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check ALGF with no displacement. +define i64 @f2(i64 %a, i32 *%src) { +; CHECK: f2: +; CHECK: algf %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %bext = zext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the high end of the aligned ALGF range. +define i64 @f3(i64 %a, i32 *%src) { +; CHECK: f3: +; CHECK: algf %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i32 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: algf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the high end of the negative aligned ALGF range. +define i64 @f5(i64 %a, i32 *%src) { +; CHECK: f5: +; CHECK: algf %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the low end of the ALGF range. +define i64 @f6(i64 %a, i32 *%src) { +; CHECK: f6: +; CHECK: algf %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i32 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524292 +; CHECK: algf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check that ALGF allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: algf %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} diff --git a/test/CodeGen/SystemZ/int-add-05.ll b/test/CodeGen/SystemZ/int-add-05.ll new file mode 100644 index 0000000..ae32cc4 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-05.ll @@ -0,0 +1,94 @@ +; Test 64-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check AGR. +define i64 @f1(i64 %a, i64 %b) { +; CHECK: f1: +; CHECK: agr %r2, %r3 +; CHECK: br %r14 + %add = add i64 %a, %b + ret i64 %add +} + +; Check AG with no displacement. +define i64 @f2(i64 %a, i64 *%src) { +; CHECK: f2: +; CHECK: ag %r2, 0(%r3) +; CHECK: br %r14 + %b = load i64 *%src + %add = add i64 %a, %b + ret i64 %add +} + +; Check the high end of the aligned AG range. +define i64 @f3(i64 %a, i64 *%src) { +; CHECK: f3: +; CHECK: ag %r2, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %add = add i64 %a, %b + ret i64 %add +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: ag %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %add = add i64 %a, %b + ret i64 %add +} + +; Check the high end of the negative aligned AG range. +define i64 @f5(i64 %a, i64 *%src) { +; CHECK: f5: +; CHECK: ag %r2, -8(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %add = add i64 %a, %b + ret i64 %add +} + +; Check the low end of the AG range. +define i64 @f6(i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK: ag %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %add = add i64 %a, %b + ret i64 %add +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524296 +; CHECK: ag %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %add = add i64 %a, %b + ret i64 %add +} + +; Check that AG allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: ag %r2, 524280({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %add = add i64 %a, %b + ret i64 %add +} diff --git a/test/CodeGen/SystemZ/int-add-06.ll b/test/CodeGen/SystemZ/int-add-06.ll new file mode 100644 index 0000000..3a9c698 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-06.ll @@ -0,0 +1,93 @@ +; Test 32-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check additions of 1. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: ahi %r2, 1 +; CHECK: br %r14 + %add = add i32 %a, 1 + ret i32 %add +} + +; Check the high end of the AHI range. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK: ahi %r2, 32767 +; CHECK: br %r14 + %add = add i32 %a, 32767 + ret i32 %add +} + +; Check the next value up, which must use AFI instead. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK: afi %r2, 32768 +; CHECK: br %r14 + %add = add i32 %a, 32768 + ret i32 %add +} + +; Check the high end of the signed 32-bit range. +define i32 @f4(i32 %a) { +; CHECK: f4: +; CHECK: afi %r2, 2147483647 +; CHECK: br %r14 + %add = add i32 %a, 2147483647 + ret i32 %add +} + +; Check the next value up, which is treated as a negative value. +define i32 @f5(i32 %a) { +; CHECK: f5: +; CHECK: afi %r2, -2147483648 +; CHECK: br %r14 + %add = add i32 %a, 2147483648 + ret i32 %add +} + +; Check the high end of the negative AHI range. +define i32 @f6(i32 %a) { +; CHECK: f6: +; CHECK: ahi %r2, -1 +; CHECK: br %r14 + %add = add i32 %a, -1 + ret i32 %add +} + +; Check the low end of the AHI range. +define i32 @f7(i32 %a) { +; CHECK: f7: +; CHECK: ahi %r2, -32768 +; CHECK: br %r14 + %add = add i32 %a, -32768 + ret i32 %add +} + +; Check the next value down, which must use AFI instead. +define i32 @f8(i32 %a) { +; CHECK: f8: +; CHECK: afi %r2, -32769 +; CHECK: br %r14 + %add = add i32 %a, -32769 + ret i32 %add +} + +; Check the low end of the signed 32-bit range. +define i32 @f9(i32 %a) { +; CHECK: f9: +; CHECK: afi %r2, -2147483648 +; CHECK: br %r14 + %add = add i32 %a, -2147483648 + ret i32 %add +} + +; Check the next value down, which is treated as a positive value. +define i32 @f10(i32 %a) { +; CHECK: f10: +; CHECK: afi %r2, 2147483647 +; CHECK: br %r14 + %add = add i32 %a, -2147483649 + ret i32 %add +} diff --git a/test/CodeGen/SystemZ/int-add-07.ll b/test/CodeGen/SystemZ/int-add-07.ll new file mode 100644 index 0000000..a065bb2 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-07.ll @@ -0,0 +1,131 @@ +; Test 64-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check additions of 1. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: {{aghi %r2, 1|la %r[0-5], 1\(%r2\)}} +; CHECK: br %r14 + %add = add i64 %a, 1 + ret i64 %add +} + +; Check the high end of the AGHI range. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: aghi %r2, 32767 +; CHECK: br %r14 + %add = add i64 %a, 32767 + ret i64 %add +} + +; Check the next value up, which must use AGFI instead. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK: {{agfi %r2, 32768|lay %r[0-5], 32768\(%r2\)}} +; CHECK: br %r14 + %add = add i64 %a, 32768 + ret i64 %add +} + +; Check the high end of the AGFI range. +define i64 @f4(i64 %a) { +; CHECK: f4: +; CHECK: agfi %r2, 2147483647 +; CHECK: br %r14 + %add = add i64 %a, 2147483647 + ret i64 %add +} + +; Check the next value up, which must use ALGFI instead. +define i64 @f5(i64 %a) { +; CHECK: f5: +; CHECK: algfi %r2, 2147483648 +; CHECK: br %r14 + %add = add i64 %a, 2147483648 + ret i64 %add +} + +; Check the high end of the ALGFI range. +define i64 @f6(i64 %a) { +; CHECK: f6: +; CHECK: algfi %r2, 4294967295 +; CHECK: br %r14 + %add = add i64 %a, 4294967295 + ret i64 %add +} + +; Check the next value up, which must be loaded into a register first. +define i64 @f7(i64 %a) { +; CHECK: f7: +; CHECK: llihl %r0, 1 +; CHECK: agr +; CHECK: br %r14 + %add = add i64 %a, 4294967296 + ret i64 %add +} + +; Check the high end of the negative AGHI range. +define i64 @f8(i64 %a) { +; CHECK: f8: +; CHECK: aghi %r2, -1 +; CHECK: br %r14 + %add = add i64 %a, -1 + ret i64 %add +} + +; Check the low end of the AGHI range. +define i64 @f9(i64 %a) { +; CHECK: f9: +; CHECK: aghi %r2, -32768 +; CHECK: br %r14 + %add = add i64 %a, -32768 + ret i64 %add +} + +; Check the next value down, which must use AGFI instead. +define i64 @f10(i64 %a) { +; CHECK: f10: +; CHECK: {{agfi %r2, -32769|lay %r[0-5]+, -32769\(%r2\)}} +; CHECK: br %r14 + %add = add i64 %a, -32769 + ret i64 %add +} + +; Check the low end of the AGFI range. +define i64 @f11(i64 %a) { +; CHECK: f11: +; CHECK: agfi %r2, -2147483648 +; CHECK: br %r14 + %add = add i64 %a, -2147483648 + ret i64 %add +} + +; Check the next value down, which must use SLGFI instead. +define i64 @f12(i64 %a) { +; CHECK: f12: +; CHECK: slgfi %r2, 2147483649 +; CHECK: br %r14 + %add = add i64 %a, -2147483649 + ret i64 %add +} + +; Check the low end of the SLGFI range. +define i64 @f13(i64 %a) { +; CHECK: f13: +; CHECK: slgfi %r2, 4294967295 +; CHECK: br %r14 + %add = add i64 %a, -4294967295 + ret i64 %add +} + +; Check the next value down, which must use register addition instead. +define i64 @f14(i64 %a) { +; CHECK: f14: +; CHECK: llihf %r0, 4294967295 +; CHECK: agr +; CHECK: br %r14 + %add = add i64 %a, -4294967296 + ret i64 %add +} diff --git a/test/CodeGen/SystemZ/int-add-08.ll b/test/CodeGen/SystemZ/int-add-08.ll new file mode 100644 index 0000000..b1f820f --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-08.ll @@ -0,0 +1,110 @@ +; Test 128-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register addition. +define void @f1(i128 *%ptr) { +; CHECK: f1: +; CHECK: algr +; CHECK: alcgr +; CHECK: br %r14 + %value = load i128 *%ptr + %add = add i128 %value, %value + store i128 %add, i128 *%ptr + ret void +} + +; Test memory addition with no offset. Making the load of %a volatile +; should force the memory operand to be %b. +define void @f2(i128 *%aptr, i64 %addr) { +; CHECK: f2: +; CHECK: alg {{%r[0-5]}}, 8(%r3) +; CHECK: alcg {{%r[0-5]}}, 0(%r3) +; CHECK: br %r14 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + +; Test the highest aligned offset that is in range of both ALG and ALCG. +define void @f3(i128 *%aptr, i64 %base) { +; CHECK: f3: +; CHECK: alg {{%r[0-5]}}, 524280(%r3) +; CHECK: alcg {{%r[0-5]}}, 524272(%r3) +; CHECK: br %r14 + %addr = add i64 %base, 524272 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + +; Test the next doubleword up, which requires separate address logic for ALG. +define void @f4(i128 *%aptr, i64 %base) { +; CHECK: f4: +; CHECK: lgr [[BASE:%r[1-5]]], %r3 +; CHECK: agfi [[BASE]], 524288 +; CHECK: alg {{%r[0-5]}}, 0([[BASE]]) +; CHECK: alcg {{%r[0-5]}}, 524280(%r3) +; CHECK: br %r14 + %addr = add i64 %base, 524280 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + +; Test the next doubleword after that, which requires separate logic for +; both instructions. It would be better to create an anchor at 524288 +; that both instructions can use, but that isn't implemented yet. +define void @f5(i128 *%aptr, i64 %base) { +; CHECK: f5: +; CHECK: alg {{%r[0-5]}}, 0({{%r[1-5]}}) +; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}}) +; CHECK: br %r14 + %addr = add i64 %base, 524288 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + +; Test the lowest displacement that is in range of both ALG and ALCG. +define void @f6(i128 *%aptr, i64 %base) { +; CHECK: f6: +; CHECK: alg {{%r[0-5]}}, -524280(%r3) +; CHECK: alcg {{%r[0-5]}}, -524288(%r3) +; CHECK: br %r14 + %addr = add i64 %base, -524288 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + +; Test the next doubleword down, which is out of range of the ALCG. +define void @f7(i128 *%aptr, i64 %base) { +; CHECK: f7: +; CHECK: alg {{%r[0-5]}}, -524288(%r3) +; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}}) +; CHECK: br %r14 + %addr = add i64 %base, -524296 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + diff --git a/test/CodeGen/SystemZ/int-add-09.ll b/test/CodeGen/SystemZ/int-add-09.ll new file mode 100644 index 0000000..bfe6338 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-09.ll @@ -0,0 +1,56 @@ +; Test 128-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check additions of 1. The XOR ensures that we don't instead load the +; constant into a register and use memory addition. +define void @f1(i128 *%aptr) { +; CHECK: f1: +; CHECK: algfi {{%r[0-5]}}, 1 +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 128 + %add = add i128 %xor, 1 + store i128 %add, i128 *%aptr + ret void +} + +; Check the high end of the ALGFI range. +define void @f2(i128 *%aptr) { +; CHECK: f2: +; CHECK: algfi {{%r[0-5]}}, 4294967295 +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 128 + %add = add i128 %xor, 4294967295 + store i128 %add, i128 *%aptr + ret void +} + +; Check the next value up, which must use register addition. +define void @f3(i128 *%aptr) { +; CHECK: f3: +; CHECK: algr +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 128 + %add = add i128 %xor, 4294967296 + store i128 %add, i128 *%aptr + ret void +} + +; Check addition of -1, which must also use register addition. +define void @f4(i128 *%aptr) { +; CHECK: f4: +; CHECK: algr +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 128 + %add = add i128 %xor, -1 + store i128 %add, i128 *%aptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-add-10.ll b/test/CodeGen/SystemZ/int-add-10.ll new file mode 100644 index 0000000..17cfdbe --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-10.ll @@ -0,0 +1,165 @@ +; Test 128-bit addition in which the second operand is a zero-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register additions. The XOR ensures that we don't instead zero-extend +; %b into a register and use memory addition. +define void @f1(i128 *%aptr, i32 %b) { +; CHECK: f1: +; CHECK: algfr {{%r[0-5]}}, %r3 +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %bext = zext i32 %b to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} + +; Like f1, but using an "in-register" extension. +define void @f2(i128 *%aptr, i64 %b) { +; CHECK: f2: +; CHECK: algfr {{%r[0-5]}}, %r3 +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %trunc = trunc i64 %b to i32 + %bext = zext i32 %trunc to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} + +; Test register addition in cases where the second operand is zero extended +; from i64 rather than i32, but is later masked to i32 range. +define void @f3(i128 *%aptr, i64 %b) { +; CHECK: f3: +; CHECK: algfr {{%r[0-5]}}, %r3 +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %bext = zext i64 %b to i128 + %and = and i128 %bext, 4294967295 + %add = add i128 %xor, %and + store i128 %add, i128 *%aptr + ret void +} + +; Test ALGF with no offset. +define void @f4(i128 *%aptr, i32 *%bsrc) { +; CHECK: f4: +; CHECK: algf {{%r[0-5]}}, 0(%r3) +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %b = load i32 *%bsrc + %bext = zext i32 %b to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} + +; Check the high end of the ALGF range. +define void @f5(i128 *%aptr, i32 *%bsrc) { +; CHECK: f5: +; CHECK: algf {{%r[0-5]}}, 524284(%r3) +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i64 131071 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i128 *%aptr, i32 *%bsrc) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: algf {{%r[0-5]}}, 0(%r3) +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i64 131072 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} + +; Check the high end of the negative aligned ALGF range. +define void @f7(i128 *%aptr, i32 *%bsrc) { +; CHECK: f7: +; CHECK: algf {{%r[0-5]}}, -4(%r3) +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i128 -1 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} + +; Check the low end of the ALGF range. +define void @f8(i128 *%aptr, i32 *%bsrc) { +; CHECK: f8: +; CHECK: algf {{%r[0-5]}}, -524288(%r3) +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i128 -131072 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f9(i128 *%aptr, i32 *%bsrc) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: algf {{%r[0-5]}}, 0(%r3) +; CHECK: alcgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i128 -131073 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} + +; Check that ALGF allows an index. +define void @f10(i128 *%aptr, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: algf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %add = add i128 %xor, %bext + store i128 %add, i128 *%aptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-add-11.ll b/test/CodeGen/SystemZ/int-add-11.ll new file mode 100644 index 0000000..47a776e --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-11.ll @@ -0,0 +1,128 @@ +; Test 32-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check additions of 1. +define void @f1(i32 *%ptr) { +; CHECK: f1: +; CHECK: asi 0(%r2), 1 +; CHECK: br %r14 + %val = load i32 *%ptr + %add = add i32 %val, 127 + store i32 %add, i32 *%ptr + ret void +} + +; Check the high end of the constant range. +define void @f2(i32 *%ptr) { +; CHECK: f2: +; CHECK: asi 0(%r2), 127 +; CHECK: br %r14 + %val = load i32 *%ptr + %add = add i32 %val, 127 + store i32 %add, i32 *%ptr + ret void +} + +; Check the next constant up, which must use an addition and a store. +; Both L/AHI and LHI/A would be OK. +define void @f3(i32 *%ptr) { +; CHECK: f3: +; CHECK-NOT: asi +; CHECK: st %r0, 0(%r2) +; CHECK: br %r14 + %val = load i32 *%ptr + %add = add i32 %val, 128 + store i32 %add, i32 *%ptr + ret void +} + +; Check the low end of the constant range. +define void @f4(i32 *%ptr) { +; CHECK: f4: +; CHECK: asi 0(%r2), -128 +; CHECK: br %r14 + %val = load i32 *%ptr + %add = add i32 %val, -128 + store i32 %add, i32 *%ptr + ret void +} + +; Check the next value down, with the same comment as f3. +define void @f5(i32 *%ptr) { +; CHECK: f5: +; CHECK-NOT: asi +; CHECK: st %r0, 0(%r2) +; CHECK: br %r14 + %val = load i32 *%ptr + %add = add i32 %val, -129 + store i32 %add, i32 *%ptr + ret void +} + +; Check the high end of the aligned ASI range. +define void @f6(i32 *%base) { +; CHECK: f6: +; CHECK: asi 524284(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %val = load i32 *%ptr + %add = add i32 %val, 1 + store i32 %add, i32 *%ptr + ret void +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define void @f7(i32 *%base) { +; CHECK: f7: +; CHECK: agfi %r2, 524288 +; CHECK: asi 0(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %val = load i32 *%ptr + %add = add i32 %val, 1 + store i32 %add, i32 *%ptr + ret void +} + +; Check the low end of the ASI range. +define void @f8(i32 *%base) { +; CHECK: f8: +; CHECK: asi -524288(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %val = load i32 *%ptr + %add = add i32 %val, 1 + store i32 %add, i32 *%ptr + ret void +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define void @f9(i32 *%base) { +; CHECK: f9: +; CHECK: agfi %r2, -524292 +; CHECK: asi 0(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %val = load i32 *%ptr + %add = add i32 %val, 1 + store i32 %add, i32 *%ptr + ret void +} + +; Check that ASI does not allow indices. +define void @f10(i64 %base, i64 %index) { +; CHECK: f10: +; CHECK: agr %r2, %r3 +; CHECK: asi 4(%r2), 1 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %val = load i32 *%ptr + %add = add i32 %val, 1 + store i32 %add, i32 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-add-12.ll b/test/CodeGen/SystemZ/int-add-12.ll new file mode 100644 index 0000000..ae1c1f7 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-12.ll @@ -0,0 +1,128 @@ +; Test 64-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check additions of 1. +define void @f1(i64 *%ptr) { +; CHECK: f1: +; CHECK: agsi 0(%r2), 1 +; CHECK: br %r14 + %val = load i64 *%ptr + %add = add i64 %val, 127 + store i64 %add, i64 *%ptr + ret void +} + +; Check the high end of the constant range. +define void @f2(i64 *%ptr) { +; CHECK: f2: +; CHECK: agsi 0(%r2), 127 +; CHECK: br %r14 + %val = load i64 *%ptr + %add = add i64 %val, 127 + store i64 %add, i64 *%ptr + ret void +} + +; Check the next constant up, which must use an addition and a store. +; Both LG/AGHI and LGHI/AG would be OK. +define void @f3(i64 *%ptr) { +; CHECK: f3: +; CHECK-NOT: agsi +; CHECK: stg %r0, 0(%r2) +; CHECK: br %r14 + %val = load i64 *%ptr + %add = add i64 %val, 128 + store i64 %add, i64 *%ptr + ret void +} + +; Check the low end of the constant range. +define void @f4(i64 *%ptr) { +; CHECK: f4: +; CHECK: agsi 0(%r2), -128 +; CHECK: br %r14 + %val = load i64 *%ptr + %add = add i64 %val, -128 + store i64 %add, i64 *%ptr + ret void +} + +; Check the next value down, with the same comment as f3. +define void @f5(i64 *%ptr) { +; CHECK: f5: +; CHECK-NOT: agsi +; CHECK: stg %r0, 0(%r2) +; CHECK: br %r14 + %val = load i64 *%ptr + %add = add i64 %val, -129 + store i64 %add, i64 *%ptr + ret void +} + +; Check the high end of the aligned AGSI range. +define void @f6(i64 *%base) { +; CHECK: f6: +; CHECK: agsi 524280(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65535 + %val = load i64 *%ptr + %add = add i64 %val, 1 + store i64 %add, i64 *%ptr + ret void +} + +; Check the next doubleword up, which must use separate address logic. +; Other sequences besides this one would be OK. +define void @f7(i64 *%base) { +; CHECK: f7: +; CHECK: agfi %r2, 524288 +; CHECK: agsi 0(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65536 + %val = load i64 *%ptr + %add = add i64 %val, 1 + store i64 %add, i64 *%ptr + ret void +} + +; Check the low end of the AGSI range. +define void @f8(i64 *%base) { +; CHECK: f8: +; CHECK: agsi -524288(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65536 + %val = load i64 *%ptr + %add = add i64 %val, 1 + store i64 %add, i64 *%ptr + ret void +} + +; Check the next doubleword down, which must use separate address logic. +; Other sequences besides this one would be OK. +define void @f9(i64 *%base) { +; CHECK: f9: +; CHECK: agfi %r2, -524296 +; CHECK: agsi 0(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65537 + %val = load i64 *%ptr + %add = add i64 %val, 1 + store i64 %add, i64 *%ptr + ret void +} + +; Check that AGSI does not allow indices. +define void @f10(i64 %base, i64 %index) { +; CHECK: f10: +; CHECK: agr %r2, %r3 +; CHECK: agsi 8(%r2), 1 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 8 + %ptr = inttoptr i64 %add2 to i64 * + %val = load i64 *%ptr + %add = add i64 %val, 1 + store i64 %add, i64 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-cmp-01.ll b/test/CodeGen/SystemZ/int-cmp-01.ll new file mode 100644 index 0000000..aa432f0 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-01.ll @@ -0,0 +1,151 @@ +; Test 32-bit signed comparison in which the second operand is sign-extended +; from an i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the CH range. +define void @f1(i32 %lhs, i16 *%src, i32 *%dst) { +; CHECK: f1: +; CHECK: ch %r2, 0(%r3) +; CHECK: br %r14 + %half = load i16 *%src + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check the high end of the aligned CH range. +define void @f2(i32 %lhs, i16 *%src, i32 *%dst) { +; CHECK: f2: +; CHECK: ch %r2, 4094(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2047 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check the next halfword up, which should use CHY instead of CH. +define void @f3(i32 %lhs, i16 *%src, i32 *%dst) { +; CHECK: f3: +; CHECK: chy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2048 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check the high end of the aligned CHY range. +define void @f4(i32 %lhs, i16 *%src, i32 *%dst) { +; CHECK: f4: +; CHECK: chy %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f5(i32 %lhs, i16 *%src, i32 *%dst) { +; CHECK: f5: +; CHECK: agfi %r3, 524288 +; CHECK: ch %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check the high end of the negative aligned CHY range. +define void @f6(i32 %lhs, i16 *%src, i32 *%dst) { +; CHECK: f6: +; CHECK: chy %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check the low end of the CHY range. +define void @f7(i32 %lhs, i16 *%src, i32 *%dst) { +; CHECK: f7: +; CHECK: chy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(i32 %lhs, i16 *%src, i32 *%dst) { +; CHECK: f8: +; CHECK: agfi %r3, -524290 +; CHECK: ch %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check that CH allows an index. +define void @f9(i32 %lhs, i64 %base, i64 %index, i32 *%dst) { +; CHECK: f9: +; CHECK: ch %r2, 4094({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} + +; Check that CHY allows an index. +define void @f10(i32 %lhs, i64 %base, i64 %index, i32 *%dst) { +; CHECK: f10: +; CHECK: chy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %cond = icmp slt i32 %lhs, %rhs + %res = select i1 %cond, i32 100, i32 200 + store i32 %res, i32 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/int-cmp-02.ll b/test/CodeGen/SystemZ/int-cmp-02.ll new file mode 100644 index 0000000..c158fb4 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-02.ll @@ -0,0 +1,162 @@ +; Test 32-bit signed comparison in which the second operand is a variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register comparison. +define double @f1(double %a, double %b, i32 %i1, i32 %i2) { +; CHECK: f1: +; CHECK: cr %r2, %r3 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the C range. +define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) { +; CHECK: f2: +; CHECK: c %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the aligned C range. +define double @f3(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f3: +; CHECK: c %r2, 4092(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1023 + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word up, which should use CY instead of C. +define double @f4(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f4: +; CHECK: cy %r2, 4096(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1024 + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the aligned CY range. +define double @f5(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f5: +; CHECK: cy %r2, 524284(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f6(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: c %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative aligned CY range. +define double @f7(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f7: +; CHECK: cy %r2, -4(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -1 + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CY range. +define double @f8(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f8: +; CHECK: cy %r2, -524288(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f9(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: c %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that C allows an index. +define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) { +; CHECK: f10: +; CHECK: c %r2, 4092({{%r4,%r3|%r3,%r4}}) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CY allows an index. +define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) { +; CHECK: f11: +; CHECK: cy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %i2 = load i32 *%ptr + %cond = icmp slt i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-03.ll b/test/CodeGen/SystemZ/int-cmp-03.ll new file mode 100644 index 0000000..4203bee --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-03.ll @@ -0,0 +1,162 @@ +; Test 32-bit unsigned comparison in which the second operand is a variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register comparison. +define double @f1(double %a, double %b, i32 %i1, i32 %i2) { +; CHECK: f1: +; CHECK: clr %r2, %r3 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CL range. +define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) { +; CHECK: f2: +; CHECK: cl %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the aligned CL range. +define double @f3(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f3: +; CHECK: cl %r2, 4092(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1023 + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word up, which should use CLY instead of CL. +define double @f4(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f4: +; CHECK: cly %r2, 4096(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1024 + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the aligned CLY range. +define double @f5(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f5: +; CHECK: cly %r2, 524284(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f6(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: cl %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative aligned CLY range. +define double @f7(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f7: +; CHECK: cly %r2, -4(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -1 + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CLY range. +define double @f8(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f8: +; CHECK: cly %r2, -524288(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f9(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: cl %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CL allows an index. +define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) { +; CHECK: f10: +; CHECK: cl %r2, 4092({{%r4,%r3|%r3,%r4}}) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CLY allows an index. +define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) { +; CHECK: f11: +; CHECK: cly %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %i2 = load i32 *%ptr + %cond = icmp ult i32 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-04.ll b/test/CodeGen/SystemZ/int-cmp-04.ll new file mode 100644 index 0000000..d0625fb --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-04.ll @@ -0,0 +1,107 @@ +; Test 64-bit signed comparison in which the second operand is sign-extended +; from an i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check CGH with no displacement. +define void @f1(i64 %lhs, i16 *%src, i64 *%dst) { +; CHECK: f1: +; CHECK: cgh %r2, 0(%r3) +; CHECK: br %r14 + %half = load i16 *%src + %rhs = sext i16 %half to i64 + %cond = icmp slt i64 %lhs, %rhs + %res = select i1 %cond, i64 100, i64 200 + store i64 %res, i64 *%dst + ret void +} + +; Check the high end of the aligned CGH range. +define void @f2(i64 %lhs, i16 *%src, i64 *%dst) { +; CHECK: f2: +; CHECK: cgh %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %rhs = sext i16 %half to i64 + %cond = icmp slt i64 %lhs, %rhs + %res = select i1 %cond, i64 100, i64 200 + store i64 %res, i64 *%dst + ret void +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f3(i64 %lhs, i16 *%src, i64 *%dst) { +; CHECK: f3: +; CHECK: agfi %r3, 524288 +; CHECK: cgh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i64 + %cond = icmp slt i64 %lhs, %rhs + %res = select i1 %cond, i64 100, i64 200 + store i64 %res, i64 *%dst + ret void +} + +; Check the high end of the negative aligned CGH range. +define void @f4(i64 %lhs, i16 *%src, i64 *%dst) { +; CHECK: f4: +; CHECK: cgh %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %rhs = sext i16 %half to i64 + %cond = icmp slt i64 %lhs, %rhs + %res = select i1 %cond, i64 100, i64 200 + store i64 %res, i64 *%dst + ret void +} + +; Check the low end of the CGH range. +define void @f5(i64 %lhs, i16 *%src, i64 *%dst) { +; CHECK: f5: +; CHECK: cgh %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i64 + %cond = icmp slt i64 %lhs, %rhs + %res = select i1 %cond, i64 100, i64 200 + store i64 %res, i64 *%dst + ret void +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i64 %lhs, i16 *%src, i64 *%dst) { +; CHECK: f6: +; CHECK: agfi %r3, -524290 +; CHECK: cgh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %rhs = sext i16 %half to i64 + %cond = icmp slt i64 %lhs, %rhs + %res = select i1 %cond, i64 100, i64 200 + store i64 %res, i64 *%dst + ret void +} + +; Check that CGH allows an index. +define void @f7(i64 %lhs, i64 %base, i64 %index, i64 *%dst) { +; CHECK: f7: +; CHECK: cgh %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i64 + %cond = icmp slt i64 %lhs, %rhs + %res = select i1 %cond, i64 100, i64 200 + store i64 %res, i64 *%dst + ret void +} diff --git a/test/CodeGen/SystemZ/int-cmp-05.ll b/test/CodeGen/SystemZ/int-cmp-05.ll new file mode 100644 index 0000000..2ab64d5 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-05.ll @@ -0,0 +1,203 @@ +; Test 64-bit comparison in which the second operand is a sign-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check signed register comparison. +define double @f1(double %a, double %b, i64 %i1, i32 %unext) { +; CHECK: f1: +; CHECK: cgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned register comparison, which can't use CGFR. +define double @f2(double %a, double %b, i64 %i1, i32 %unext) { +; CHECK: f2: +; CHECK-NOT: cgfr +; CHECK: br %r14 + %i2 = sext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check register equality. +define double @f3(double %a, double %b, i64 %i1, i32 %unext) { +; CHECK: f3: +; CHECK: cgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = sext i32 %unext to i64 + %cond = icmp eq i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check register inequality. +define double @f4(double %a, double %b, i64 %i1, i32 %unext) { +; CHECK: f4: +; CHECK: cgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = sext i32 %unext to i64 + %cond = icmp ne i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparisonn with memory. +define double @f5(double %a, double %b, i64 %i1, i32 *%ptr) { +; CHECK: f5: +; CHECK: cgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison with memory. +define double @f6(double %a, double %b, i64 %i1, i32 *%ptr) { +; CHECK: f6: +; CHECK-NOT: cgf +; CHECK: br %r14 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check memory equality. +define double @f7(double %a, double %b, i64 %i1, i32 *%ptr) { +; CHECK: f7: +; CHECK: cgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp eq i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check memory inequality. +define double @f8(double %a, double %b, i64 %i1, i32 *%ptr) { +; CHECK: f8: +; CHECK: cgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp ne i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the aligned CGF range. +define double @f9(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f9: +; CHECK: cgf %r2, 524284(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f10(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f10: +; CHECK: agfi %r3, 524288 +; CHECK: cgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative aligned CGF range. +define double @f11(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f11: +; CHECK: cgf %r2, -4(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -1 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CGF range. +define double @f12(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f12: +; CHECK: cgf %r2, -524288(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f13(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f13: +; CHECK: agfi %r3, -524292 +; CHECK: cgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CGF allows an index. +define double @f14(double %a, double %b, i64 %i1, i64 %base, i64 %index) { +; CHECK: f14: +; CHECK: cgf %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %unext = load i32 *%ptr + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-06.ll b/test/CodeGen/SystemZ/int-cmp-06.ll new file mode 100644 index 0000000..26f6dbf --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-06.ll @@ -0,0 +1,253 @@ +; Test 64-bit comparison in which the second operand is a zero-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check unsigned register comparison. +define double @f1(double %a, double %b, i64 %i1, i32 %unext) { +; CHECK: f1: +; CHECK: clgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and again with a different representation. +define double @f2(double %a, double %b, i64 %i1, i64 %unext) { +; CHECK: f2: +; CHECK: clgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = and i64 %unext, 4294967295 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed register comparison, which can't use CLGFR. +define double @f3(double %a, double %b, i64 %i1, i32 %unext) { +; CHECK: f3: +; CHECK-NOT: clgfr +; CHECK: br %r14 + %i2 = zext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and again with a different representation +define double @f4(double %a, double %b, i64 %i1, i64 %unext) { +; CHECK: f4: +; CHECK-NOT: clgfr +; CHECK: br %r14 + %i2 = and i64 %unext, 4294967295 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check register equality. +define double @f5(double %a, double %b, i64 %i1, i32 %unext) { +; CHECK: f5: +; CHECK: clgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = zext i32 %unext to i64 + %cond = icmp eq i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and again with a different representation +define double @f6(double %a, double %b, i64 %i1, i64 %unext) { +; CHECK: f6: +; CHECK: clgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = and i64 %unext, 4294967295 + %cond = icmp eq i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check register inequality. +define double @f7(double %a, double %b, i64 %i1, i32 %unext) { +; CHECK: f7: +; CHECK: clgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = zext i32 %unext to i64 + %cond = icmp ne i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and again with a different representation +define double @f8(double %a, double %b, i64 %i1, i64 %unext) { +; CHECK: f8: +; CHECK: clgfr %r2, %r3 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = and i64 %unext, 4294967295 + %cond = icmp ne i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparisonn with memory. +define double @f9(double %a, double %b, i64 %i1, i32 *%ptr) { +; CHECK: f9: +; CHECK: clgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison with memory. +define double @f10(double %a, double %b, i64 %i1, i32 *%ptr) { +; CHECK: f10: +; CHECK-NOT: clgf +; CHECK: br %r14 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check memory equality. +define double @f11(double %a, double %b, i64 %i1, i32 *%ptr) { +; CHECK: f11: +; CHECK: clgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp eq i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check memory inequality. +define double @f12(double %a, double %b, i64 %i1, i32 *%ptr) { +; CHECK: f12: +; CHECK: clgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp ne i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the aligned CLGF range. +define double @f13(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f13: +; CHECK: clgf %r2, 524284(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f14(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f14: +; CHECK: agfi %r3, 524288 +; CHECK: clgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative aligned CLGF range. +define double @f15(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f15: +; CHECK: clgf %r2, -4(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -1 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CLGF range. +define double @f16(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f16: +; CHECK: clgf %r2, -524288(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f17(double %a, double %b, i64 %i1, i32 *%base) { +; CHECK: f17: +; CHECK: agfi %r3, -524292 +; CHECK: clgf %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CLGF allows an index. +define double @f18(double %a, double %b, i64 %i1, i64 %base, i64 %index) { +; CHECK: f18: +; CHECK: clgf %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %unext = load i32 *%ptr + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-07.ll b/test/CodeGen/SystemZ/int-cmp-07.ll new file mode 100644 index 0000000..1a6f622 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-07.ll @@ -0,0 +1,118 @@ +; Test 64-bit signed comparison in which the second operand is a variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check CGR. +define double @f1(double %a, double %b, i64 %i1, i64 %i2) { +; CHECK: f1: +; CHECK: cgr %r2, %r3 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check CG with no displacement. +define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) { +; CHECK: f2: +; CHECK: cg %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = load i64 *%ptr + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the aligned CG range. +define double @f3(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f3: +; CHECK: cg %r2, 524280(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65535 + %i2 = load i64 *%ptr + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: cg %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65536 + %i2 = load i64 *%ptr + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative aligned CG range. +define double @f5(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f5: +; CHECK: cg %r2, -8(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -1 + %i2 = load i64 *%ptr + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CG range. +define double @f6(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f6: +; CHECK: cg %r2, -524288(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65536 + %i2 = load i64 *%ptr + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f7(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f7: +; CHECK: agfi %r3, -524296 +; CHECK: cg %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65537 + %i2 = load i64 *%ptr + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CG allows an index. +define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) { +; CHECK: f8: +; CHECK: cg %r2, 524280({{%r4,%r3|%r3,%r4}}) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %i2 = load i64 *%ptr + %cond = icmp slt i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-08.ll b/test/CodeGen/SystemZ/int-cmp-08.ll new file mode 100644 index 0000000..6e9a13e --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-08.ll @@ -0,0 +1,118 @@ +; Test 64-bit unsigned comparison in which the second operand is a variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check CLGR. +define double @f1(double %a, double %b, i64 %i1, i64 %i2) { +; CHECK: f1: +; CHECK: clgr %r2, %r3 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check CLG with no displacement. +define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) { +; CHECK: f2: +; CHECK: clg %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = load i64 *%ptr + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the aligned CLG range. +define double @f3(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f3: +; CHECK: clg %r2, 524280(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65535 + %i2 = load i64 *%ptr + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f4(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: clg %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65536 + %i2 = load i64 *%ptr + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative aligned CLG range. +define double @f5(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f5: +; CHECK: clg %r2, -8(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -1 + %i2 = load i64 *%ptr + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CLG range. +define double @f6(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f6: +; CHECK: clg %r2, -524288(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65536 + %i2 = load i64 *%ptr + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f7(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f7: +; CHECK: agfi %r3, -524296 +; CHECK: clg %r2, 0(%r3) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65537 + %i2 = load i64 *%ptr + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CLG allows an index. +define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) { +; CHECK: f8: +; CHECK: clg %r2, 524280({{%r4,%r3|%r3,%r4}}) +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %i2 = load i64 *%ptr + %cond = icmp ult i64 %i1, %i2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-09.ll b/test/CodeGen/SystemZ/int-cmp-09.ll new file mode 100644 index 0000000..bb7213c --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-09.ll @@ -0,0 +1,135 @@ +; Test 32-bit signed comparison in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check comparisons with 0. +define double @f1(double %a, double %b, i32 %i1) { +; CHECK: f1: +; CHECK: chi %r2, 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 1. +define double @f2(double %a, double %b, i32 %i1) { +; CHECK: f2: +; CHECK: chi %r2, 1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CHI range. +define double @f3(double %a, double %b, i32 %i1) { +; CHECK: f3: +; CHECK: chi %r2, 32767 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use CFI. +define double @f4(double %a, double %b, i32 %i1) { +; CHECK: f4: +; CHECK: cfi %r2, 32768 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 32-bit range. +define double @f5(double %a, double %b, i32 %i1) { +; CHECK: f5: +; CHECK: cfi %r2, 2147483647 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i32 %i1, 2147483647 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which should be treated as a negative value. +define double @f6(double %a, double %b, i32 %i1) { +; CHECK: f6: +; CHECK: cfi %r2, -2147483648 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i32 %i1, 2147483648 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative CHI range. +define double @f7(double %a, double %b, i32 %i1) { +; CHECK: f7: +; CHECK: chi %r2, -1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CHI range. +define double @f8(double %a, double %b, i32 %i1) { +; CHECK: f8: +; CHECK: chi %r2, -32768 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use CFI instead. +define double @f9(double %a, double %b, i32 %i1) { +; CHECK: f9: +; CHECK: cfi %r2, -32769 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i32 %i1, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 32-bit range. +define double @f10(double %a, double %b, i32 %i1) { +; CHECK: f10: +; CHECK: cfi %r2, -2147483648 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i32 %i1, -2147483648 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which should be treated as a positive value. +define double @f11(double %a, double %b, i32 %i1) { +; CHECK: f11: +; CHECK: cfi %r2, 2147483647 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i32 %i1, -2147483649 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-10.ll b/test/CodeGen/SystemZ/int-cmp-10.ll new file mode 100644 index 0000000..f2d3ccd --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-10.ll @@ -0,0 +1,28 @@ +; Test 32-bit unsigned comparisons in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check a value near the low end of the range. We use CFI for comparisons +; with zero, or things that are equivalent to them. +define double @f1(double %a, double %b, i32 %i1) { +; CHECK: f1: +; CHECK: clfi %r2, 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ugt i32 %i1, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check a value near the high end of the range. +define double @f2(double %a, double %b, i32 %i1) { +; CHECK: f2: +; CHECK: clfi %r2, 4294967280 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i32 %i1, 4294967280 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-11.ll b/test/CodeGen/SystemZ/int-cmp-11.ll new file mode 100644 index 0000000..1bfb0c6 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-11.ll @@ -0,0 +1,135 @@ +; Test 64-bit signed comparisons in which the second operand is a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check comparisons with 0. +define double @f1(double %a, double %b, i64 %i1) { +; CHECK: f1: +; CHECK: cghi %r2, 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 1. +define double @f2(double %a, double %b, i64 %i1) { +; CHECK: f2: +; CHECK: cghi %r2, 1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGHI range. +define double @f3(double %a, double %b, i64 %i1) { +; CHECK: f3: +; CHECK: cghi %r2, 32767 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use CGFI. +define double @f4(double %a, double %b, i64 %i1) { +; CHECK: f4: +; CHECK: cgfi %r2, 32768 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGFI range. +define double @f5(double %a, double %b, i64 %i1) { +; CHECK: f5: +; CHECK: cgfi %r2, 2147483647 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, 2147483647 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use register comparison. +define double @f6(double %a, double %b, i64 %i1) { +; CHECK: f6: +; CHECK: cgr +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, 2147483648 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative CGHI range. +define double @f7(double %a, double %b, i64 %i1) { +; CHECK: f7: +; CHECK: cghi %r2, -1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CGHI range. +define double @f8(double %a, double %b, i64 %i1) { +; CHECK: f8: +; CHECK: cghi %r2, -32768 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use CGFI instead. +define double @f9(double %a, double %b, i64 %i1) { +; CHECK: f9: +; CHECK: cgfi %r2, -32769 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CGFI range. +define double @f10(double %a, double %b, i64 %i1) { +; CHECK: f10: +; CHECK: cgfi %r2, -2147483648 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, -2147483648 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use register comparison. +define double @f11(double %a, double %b, i64 %i1) { +; CHECK: f11: +; CHECK: cgr +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp slt i64 %i1, -2147483649 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-12.ll b/test/CodeGen/SystemZ/int-cmp-12.ll new file mode 100644 index 0000000..0288730 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-12.ll @@ -0,0 +1,40 @@ +; Test 64-bit unsigned comparisons in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check a value near the low end of the range. We use CGFI for comparisons +; with zero, or things that are equivalent to them. +define double @f1(double %a, double %b, i64 %i1) { +; CHECK: f1: +; CHECK: clgfi %r2, 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ugt i64 %i1, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLGFI range. +define double @f2(double %a, double %b, i64 %i1) { +; CHECK: f2: +; CHECK: clgfi %r2, 4294967295 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i64 %i1, 4294967295 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use a register comparison. +define double @f3(double %a, double %b, i64 %i1) { +; CHECK: f3: +; CHECK: clgr %r2, +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ult i64 %i1, 4294967296 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-13.ll b/test/CodeGen/SystemZ/int-cmp-13.ll new file mode 100644 index 0000000..c180831 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-13.ll @@ -0,0 +1,147 @@ +; Test 64-bit equality comparisons in which the second operand is a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check comparisons with 0. +define double @f1(double %a, double %b, i64 %i1) { +; CHECK: f1: +; CHECK: cghi %r2, 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGHI range. +define double @f2(double %a, double %b, i64 %i1) { +; CHECK: f2: +; CHECK: cghi %r2, 32767 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use CGFI. +define double @f3(double %a, double %b, i64 %i1) { +; CHECK: f3: +; CHECK: cgfi %r2, 32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGFI range. +define double @f4(double %a, double %b, i64 %i1) { +; CHECK: f4: +; CHECK: cgfi %r2, 2147483647 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 2147483647 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which should use CLGFI instead. +define double @f5(double %a, double %b, i64 %i1) { +; CHECK: f5: +; CHECK: clgfi %r2, 2147483648 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 2147483648 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLGFI range. +define double @f6(double %a, double %b, i64 %i1) { +; CHECK: f6: +; CHECK: clgfi %r2, 4294967295 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 4294967295 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use a register comparison. +define double @f7(double %a, double %b, i64 %i1) { +; CHECK: f7: +; CHECK: cgr %r2, +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, 4294967296 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative CGHI range. +define double @f8(double %a, double %b, i64 %i1) { +; CHECK: f8: +; CHECK: cghi %r2, -1 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CGHI range. +define double @f9(double %a, double %b, i64 %i1) { +; CHECK: f9: +; CHECK: cghi %r2, -32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use CGFI instead. +define double @f10(double %a, double %b, i64 %i1) { +; CHECK: f10: +; CHECK: cgfi %r2, -32769 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CGFI range. +define double @f11(double %a, double %b, i64 %i1) { +; CHECK: f11: +; CHECK: cgfi %r2, -2147483648 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, -2147483648 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use register comparison. +define double @f12(double %a, double %b, i64 %i1) { +; CHECK: f12: +; CHECK: cgr +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp eq i64 %i1, -2147483649 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-14.ll b/test/CodeGen/SystemZ/int-cmp-14.ll new file mode 100644 index 0000000..6a7e0e6 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-14.ll @@ -0,0 +1,147 @@ +; Test 64-bit inequality comparisons in which the second operand is a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check comparisons with 0. +define double @f1(double %a, double %b, i64 %i1) { +; CHECK: f1: +; CHECK: cghi %r2, 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGHI range. +define double @f2(double %a, double %b, i64 %i1) { +; CHECK: f2: +; CHECK: cghi %r2, 32767 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use CGFI. +define double @f3(double %a, double %b, i64 %i1) { +; CHECK: f3: +; CHECK: cgfi %r2, 32768 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGFI range. +define double @f4(double %a, double %b, i64 %i1) { +; CHECK: f4: +; CHECK: cgfi %r2, 2147483647 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 2147483647 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which should use CLGFI instead. +define double @f5(double %a, double %b, i64 %i1) { +; CHECK: f5: +; CHECK: clgfi %r2, 2147483648 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 2147483648 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLGFI range. +define double @f6(double %a, double %b, i64 %i1) { +; CHECK: f6: +; CHECK: clgfi %r2, 4294967295 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 4294967295 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which must use a register comparison. +define double @f7(double %a, double %b, i64 %i1) { +; CHECK: f7: +; CHECK: cgr %r2, +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, 4294967296 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative CGHI range. +define double @f8(double %a, double %b, i64 %i1) { +; CHECK: f8: +; CHECK: cghi %r2, -1 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CGHI range. +define double @f9(double %a, double %b, i64 %i1) { +; CHECK: f9: +; CHECK: cghi %r2, -32768 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use CGFI instead. +define double @f10(double %a, double %b, i64 %i1) { +; CHECK: f10: +; CHECK: cgfi %r2, -32769 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CGFI range. +define double @f11(double %a, double %b, i64 %i1) { +; CHECK: f11: +; CHECK: cgfi %r2, -2147483648 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, -2147483648 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which must use register comparison. +define double @f12(double %a, double %b, i64 %i1) { +; CHECK: f12: +; CHECK: cgr +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %cond = icmp ne i64 %i1, -2147483649 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-15.ll b/test/CodeGen/SystemZ/int-cmp-15.ll new file mode 100644 index 0000000..6bb7e2b --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-15.ll @@ -0,0 +1,241 @@ +; Test 8-bit unsigned comparisons between memory and constants. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ordered comparisons near the low end of the unsigned 8-bit range. +define double @f1(double %a, double %b, i8 *%ptr) { +; CHECK: f1: +; CHECK: cli 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp ugt i8 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons near the high end of the unsigned 8-bit range. +define double @f2(double %a, double %b, i8 *%ptr) { +; CHECK: f2: +; CHECK: cli 0(%r2), 254 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 254 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check tests for negative bytes. +define double @f3(double %a, double %b, i8 *%ptr) { +; CHECK: f3: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp slt i8 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and an alternative form. +define double @f4(double %a, double %b, i8 *%ptr) { +; CHECK: f4: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp sle i8 %val, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check tests for non-negative bytes. +define double @f5(double %a, double %b, i8 *%ptr) { +; CHECK: f5: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp sge i8 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and an alternative form. +define double @f6(double %a, double %b, i8 *%ptr) { +; CHECK: f6: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp sgt i8 %val, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons at the low end of the signed 8-bit range. +define double @f7(double %a, double %b, i8 *%ptr) { +; CHECK: f7: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp eq i8 %val, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons at the low end of the unsigned 8-bit range. +define double @f8(double %a, double %b, i8 *%ptr) { +; CHECK: f8: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp eq i8 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons at the high end of the signed 8-bit range. +define double @f9(double %a, double %b, i8 *%ptr) { +; CHECK: f9: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp eq i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons at the high end of the unsigned 8-bit range. +define double @f10(double %a, double %b, i8 *%ptr) { +; CHECK: f10: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %cond = icmp eq i8 %val, 255 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLI range. +define double @f11(double %a, double %b, i8 *%src) { +; CHECK: f11: +; CHECK: cli 4095(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4095 + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next byte up, which should use CLIY instead of CLI. +define double @f12(double %a, double %b, i8 *%src) { +; CHECK: f12: +; CHECK: cliy 4096(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4096 + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLIY range. +define double @f13(double %a, double %b, i8 *%src) { +; CHECK: f13: +; CHECK: cliy 524287(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f14(double %a, double %b, i8 *%src) { +; CHECK: f14: +; CHECK: agfi %r2, 524288 +; CHECK: cli 0(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the negative CLIY range. +define double @f15(double %a, double %b, i8 *%src) { +; CHECK: f15: +; CHECK: cliy -1(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the CLIY range. +define double @f16(double %a, double %b, i8 *%src) { +; CHECK: f16: +; CHECK: cliy -524288(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define double @f17(double %a, double %b, i8 *%src) { +; CHECK: f17: +; CHECK: agfi %r2, -524289 +; CHECK: cli 0(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CLI does not allow an index +define double @f18(double %a, double %b, i64 %base, i64 %index) { +; CHECK: f18: +; CHECK: agr %r2, %r3 +; CHECK: cli 4095(%r2), 127 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to i8 * + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CLIY does not allow an index +define double @f19(double %a, double %b, i64 %base, i64 %index) { +; CHECK: f19: +; CHECK: agr %r2, %r3 +; CHECK: cliy 4096(%r2), 127 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i8 * + %val = load i8 *%ptr + %cond = icmp ult i8 %val, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-16.ll b/test/CodeGen/SystemZ/int-cmp-16.ll new file mode 100644 index 0000000..8af854e --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-16.ll @@ -0,0 +1,133 @@ +; Test 32-bit equality comparisons that are really between a memory byte +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the 8-bit unsigned range, with zero extension. +define double @f1(double %a, double %b, i8 *%ptr) { +; CHECK: f1: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp eq i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the 8-bit unsigned range, with zero extension. +define double @f2(double %a, double %b, i8 *%ptr) { +; CHECK: f2: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp eq i32 %ext, 255 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, with zero extension. The condition is always false. +define double @f3(double %a, double %b, i8 *%ptr) { +; CHECK: f3: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp eq i32 %ext, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, with zero extension. +; This condition is also always false. +define double @f4(double %a, double %b, i8 *%ptr) { +; CHECK: f4: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp eq i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 0, using sign extension. +define double @f5(double %a, double %b, i8 *%ptr) { +; CHECK: f5: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp eq i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 8-bit range, using sign extension. +define double @f6(double %a, double %b, i8 *%ptr) { +; CHECK: f6: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp eq i32 %ext, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, using sign extension. +; The condition is always false. +define double @f7(double %a, double %b, i8 *%ptr) { +; CHECK: f7: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp eq i32 %ext, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, using sign extension. +define double @f8(double %a, double %b, i8 *%ptr) { +; CHECK: f8: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp eq i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 8-bit range, using sign extension. +define double @f9(double %a, double %b, i8 *%ptr) { +; CHECK: f9: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp eq i32 %ext, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, using sign extension. +; The condition is always false. +define double @f10(double %a, double %b, i8 *%ptr) { +; CHECK: f10: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp eq i32 %ext, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-17.ll b/test/CodeGen/SystemZ/int-cmp-17.ll new file mode 100644 index 0000000..d4d5e98 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-17.ll @@ -0,0 +1,133 @@ +; Test 32-bit inequality comparisons that are really between a memory byte +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the 8-bit unsigned range, with zero extension. +define double @f1(double %a, double %b, i8 *%ptr) { +; CHECK: f1: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp ne i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the 8-bit unsigned range, with zero extension. +define double @f2(double %a, double %b, i8 *%ptr) { +; CHECK: f2: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp ne i32 %ext, 255 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, with zero extension. The condition is always false. +define double @f3(double %a, double %b, i8 *%ptr) { +; CHECK: f3: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp ne i32 %ext, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, with zero extension. +; This condition is also always false. +define double @f4(double %a, double %b, i8 *%ptr) { +; CHECK: f4: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp ne i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 0, using sign extension. +define double @f5(double %a, double %b, i8 *%ptr) { +; CHECK: f5: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ne i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 8-bit range, using sign extension. +define double @f6(double %a, double %b, i8 *%ptr) { +; CHECK: f6: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ne i32 %ext, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, using sign extension. +; The condition is always false. +define double @f7(double %a, double %b, i8 *%ptr) { +; CHECK: f7: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ne i32 %ext, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, using sign extension. +define double @f8(double %a, double %b, i8 *%ptr) { +; CHECK: f8: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ne i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 8-bit range, using sign extension. +define double @f9(double %a, double %b, i8 *%ptr) { +; CHECK: f9: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ne i32 %ext, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, using sign extension. +; The condition is always false. +define double @f10(double %a, double %b, i8 *%ptr) { +; CHECK: f10: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ne i32 %ext, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-18.ll b/test/CodeGen/SystemZ/int-cmp-18.ll new file mode 100644 index 0000000..9822dc2 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-18.ll @@ -0,0 +1,133 @@ +; Test 64-bit equality comparisons that are really between a memory byte +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the 8-bit unsigned range, with zero extension. +define double @f1(double %a, double %b, i8 *%ptr) { +; CHECK: f1: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp eq i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the 8-bit unsigned range, with zero extension. +define double @f2(double %a, double %b, i8 *%ptr) { +; CHECK: f2: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp eq i64 %ext, 255 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, with zero extension. The condition is always false. +define double @f3(double %a, double %b, i8 *%ptr) { +; CHECK: f3: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp eq i64 %ext, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, with zero extension. +; This condition is also always false. +define double @f4(double %a, double %b, i8 *%ptr) { +; CHECK: f4: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp eq i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 0, using sign extension. +define double @f5(double %a, double %b, i8 *%ptr) { +; CHECK: f5: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp eq i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 8-bit range, using sign extension. +define double @f6(double %a, double %b, i8 *%ptr) { +; CHECK: f6: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp eq i64 %ext, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, using sign extension. +; The condition is always false. +define double @f7(double %a, double %b, i8 *%ptr) { +; CHECK: f7: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp eq i64 %ext, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, using sign extension. +define double @f8(double %a, double %b, i8 *%ptr) { +; CHECK: f8: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp eq i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 8-bit range, using sign extension. +define double @f9(double %a, double %b, i8 *%ptr) { +; CHECK: f9: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp eq i64 %ext, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, using sign extension. +; The condition is always false. +define double @f10(double %a, double %b, i8 *%ptr) { +; CHECK: f10: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp eq i64 %ext, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-19.ll b/test/CodeGen/SystemZ/int-cmp-19.ll new file mode 100644 index 0000000..7d29dbc --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-19.ll @@ -0,0 +1,133 @@ +; Test 64-bit inequality comparisons that are really between a memory byte +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the 8-bit unsigned range, with zero extension. +define double @f1(double %a, double %b, i8 *%ptr) { +; CHECK: f1: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp ne i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the 8-bit unsigned range, with zero extension. +define double @f2(double %a, double %b, i8 *%ptr) { +; CHECK: f2: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp ne i64 %ext, 255 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, with zero extension. The condition is always false. +define double @f3(double %a, double %b, i8 *%ptr) { +; CHECK: f3: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp ne i64 %ext, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, with zero extension. +; This condition is also always false. +define double @f4(double %a, double %b, i8 *%ptr) { +; CHECK: f4: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp ne i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 0, using sign extension. +define double @f5(double %a, double %b, i8 *%ptr) { +; CHECK: f5: +; CHECK: cli 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ne i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 8-bit range, using sign extension. +define double @f6(double %a, double %b, i8 *%ptr) { +; CHECK: f6: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ne i64 %ext, 127 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, using sign extension. +; The condition is always false. +define double @f7(double %a, double %b, i8 *%ptr) { +; CHECK: f7: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ne i64 %ext, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, using sign extension. +define double @f8(double %a, double %b, i8 *%ptr) { +; CHECK: f8: +; CHECK: cli 0(%r2), 255 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ne i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 8-bit range, using sign extension. +define double @f9(double %a, double %b, i8 *%ptr) { +; CHECK: f9: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ne i64 %ext, -128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, using sign extension. +; The condition is always false. +define double @f10(double %a, double %b, i8 *%ptr) { +; CHECK: f10: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ne i64 %ext, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-20.ll b/test/CodeGen/SystemZ/int-cmp-20.ll new file mode 100644 index 0000000..8fffbc8 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-20.ll @@ -0,0 +1,220 @@ +; Test 32-bit ordered comparisons that are really between a memory byte +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check unsigned comparison near the low end of the CLI range, using zero +; extension. +define double @f1(double %a, double %b, i8 *%ptr) { +; CHECK: f1: +; CHECK: cli 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp ugt i32 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the low end of the CLI range, using sign +; extension. +define double @f2(double %a, double %b, i8 *%ptr) { +; CHECK: f2: +; CHECK: cli 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ugt i32 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the high end of the CLI range, using zero +; extension. +define double @f3(double %a, double %b, i8 *%ptr) { +; CHECK: f3: +; CHECK: cli 0(%r2), 254 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp ult i32 %ext, 254 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the high end of the CLI range, using sign +; extension. +define double @f4(double %a, double %b, i8 *%ptr) { +; CHECK: f4: +; CHECK: cli 0(%r2), 254 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ult i32 %ext, -2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison above the high end of the CLI range, using zero +; extension. The condition is always true. +define double @f5(double %a, double %b, i8 *%ptr) { +; CHECK: f5: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp ult i32 %ext, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; When using unsigned comparison with sign extension, equality with values +; in the range [128, MAX-129] is impossible, and ordered comparisons with +; those values are effectively sign tests. Since such comparisons are +; unlikely to occur in practice, we don't bother optimizing the second case, +; and simply ignore CLI for this range. First check the low end of the range. +define double @f6(double %a, double %b, i8 *%ptr) { +; CHECK: f6: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ult i32 %ext, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and then the high end. +define double @f7(double %a, double %b, i8 *%ptr) { +; CHECK: f7: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp ult i32 %ext, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CLI range, using zero +; extension. This is equivalent to unsigned comparison. +define double @f8(double %a, double %b, i8 *%ptr) { +; CHECK: f8: +; CHECK: cli 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp sgt i32 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CLI range, using sign +; extension. This cannot use CLI. +define double @f9(double %a, double %b, i8 *%ptr) { +; CHECK: f9: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp sgt i32 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CLI range, using zero +; extension. This is equivalent to unsigned comparison. +define double @f10(double %a, double %b, i8 *%ptr) { +; CHECK: f10: +; CHECK: cli 0(%r2), 254 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp slt i32 %ext, 254 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CLI range, using sign +; extension. This cannot use CLI. +define double @f11(double %a, double %b, i8 *%ptr) { +; CHECK: f11: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp slt i32 %ext, -2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison above the high end of the CLI range, using zero +; extension. The condition is always true. +define double @f12(double %a, double %b, i8 *%ptr) { +; CHECK: f12: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %cond = icmp slt i32 %ext, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check tests for nonnegative values. +define double @f13(double %a, double %b, i8 *%ptr) { +; CHECK: f13: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp sge i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and another form +define double @f14(double %a, double %b, i8 *%ptr) { +; CHECK: f14: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp sgt i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check tests for negative values. +define double @f15(double %a, double %b, i8 *%ptr) { +; CHECK: f15: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp slt i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and another form +define double @f16(double %a, double %b, i8 *%ptr) { +; CHECK: f16: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %cond = icmp sle i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-21.ll b/test/CodeGen/SystemZ/int-cmp-21.ll new file mode 100644 index 0000000..43447b8 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-21.ll @@ -0,0 +1,220 @@ +; Test 64-bit ordered comparisons that are really between a memory byte +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check unsigned comparison near the low end of the CLI range, using zero +; extension. +define double @f1(double %a, double %b, i8 *%ptr) { +; CHECK: f1: +; CHECK: cli 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp ugt i64 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the low end of the CLI range, using sign +; extension. +define double @f2(double %a, double %b, i8 *%ptr) { +; CHECK: f2: +; CHECK: cli 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ugt i64 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the high end of the CLI range, using zero +; extension. +define double @f3(double %a, double %b, i8 *%ptr) { +; CHECK: f3: +; CHECK: cli 0(%r2), 254 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp ult i64 %ext, 254 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the high end of the CLI range, using sign +; extension. +define double @f4(double %a, double %b, i8 *%ptr) { +; CHECK: f4: +; CHECK: cli 0(%r2), 254 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ult i64 %ext, -2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison above the high end of the CLI range, using zero +; extension. The condition is always true. +define double @f5(double %a, double %b, i8 *%ptr) { +; CHECK: f5: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp ult i64 %ext, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; When using unsigned comparison with sign extension, equality with values +; in the range [128, MAX-129] is impossible, and ordered comparisons with +; those values are effectively sign tests. Since such comparisons are +; unlikely to occur in practice, we don't bother optimizing the second case, +; and simply ignore CLI for this range. First check the low end of the range. +define double @f6(double %a, double %b, i8 *%ptr) { +; CHECK: f6: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ult i64 %ext, 128 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and then the high end. +define double @f7(double %a, double %b, i8 *%ptr) { +; CHECK: f7: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp ult i64 %ext, -129 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CLI range, using zero +; extension. This is equivalent to unsigned comparison. +define double @f8(double %a, double %b, i8 *%ptr) { +; CHECK: f8: +; CHECK: cli 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp sgt i64 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CLI range, using sign +; extension. This cannot use CLI. +define double @f9(double %a, double %b, i8 *%ptr) { +; CHECK: f9: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp sgt i64 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CLI range, using zero +; extension. This is equivalent to unsigned comparison. +define double @f10(double %a, double %b, i8 *%ptr) { +; CHECK: f10: +; CHECK: cli 0(%r2), 254 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp slt i64 %ext, 254 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CLI range, using sign +; extension. This cannot use CLI. +define double @f11(double %a, double %b, i8 *%ptr) { +; CHECK: f11: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp slt i64 %ext, -2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison above the high end of the CLI range, using zero +; extension. The condition is always true. +define double @f12(double %a, double %b, i8 *%ptr) { +; CHECK: f12: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %cond = icmp slt i64 %ext, 256 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check tests for nonnegative values. +define double @f13(double %a, double %b, i8 *%ptr) { +; CHECK: f13: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp sge i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and another form +define double @f14(double %a, double %b, i8 *%ptr) { +; CHECK: f14: +; CHECK: cli 0(%r2), 128 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp sgt i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check tests for negative values. +define double @f15(double %a, double %b, i8 *%ptr) { +; CHECK: f15: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp slt i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and another form +define double @f16(double %a, double %b, i8 *%ptr) { +; CHECK: f16: +; CHECK: cli 0(%r2), 127 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %cond = icmp sle i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-22.ll b/test/CodeGen/SystemZ/int-cmp-22.ll new file mode 100644 index 0000000..513d4be --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-22.ll @@ -0,0 +1,128 @@ +; Test 16-bit signed ordered comparisons between memory and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check comparisons with 0. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: chhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp slt i16 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 1. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: chhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp slt i16 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check a value near the high end of the signed 16-bit range. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK: chhsi 0(%r2), 32766 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp slt i16 %val, 32766 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK: chhsi 0(%r2), -1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp slt i16 %val, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check a value near the low end of the 16-bit signed range. +define double @f5(double %a, double %b, i16 *%ptr) { +; CHECK: f5: +; CHECK: chhsi 0(%r2), -32766 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp slt i16 %val, -32766 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CHHSI range. +define double @f6(double %a, double %b, i16 %i1, i16 *%base) { +; CHECK: f6: +; CHECK: chhsi 4094(%r3), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 2047 + %val = load i16 *%ptr + %cond = icmp slt i16 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next halfword up, which needs separate address logic, +define double @f7(double %a, double %b, i16 *%base) { +; CHECK: f7: +; CHECK: aghi %r2, 4096 +; CHECK: chhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 2048 + %val = load i16 *%ptr + %cond = icmp slt i16 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check negative offsets, which also need separate address logic. +define double @f8(double %a, double %b, i16 *%base) { +; CHECK: f8: +; CHECK: aghi %r2, -2 +; CHECK: chhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 -1 + %val = load i16 *%ptr + %cond = icmp slt i16 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CHHSI does not allow indices. +define double @f9(double %a, double %b, i64 %base, i64 %index) { +; CHECK: f9: +; CHECK: agr {{%r2, %r3|%r3, %r2}} +; CHECK: chhsi 0({{%r[23]}}), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i16 * + %val = load i16 *%ptr + %cond = icmp slt i16 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-23.ll b/test/CodeGen/SystemZ/int-cmp-23.ll new file mode 100644 index 0000000..40e1331 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-23.ll @@ -0,0 +1,89 @@ +; Test 16-bit unsigned comparisons between memory and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check a value near the low end of the unsigned 16-bit range. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp ugt i16 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check a value near the high end of the unsigned 16-bit range. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 65534 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp ult i16 %val, 65534 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLHHSI range. +define double @f3(double %a, double %b, i16 %i1, i16 *%base) { +; CHECK: f3: +; CHECK: clhhsi 4094(%r3), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 2047 + %val = load i16 *%ptr + %cond = icmp ugt i16 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next halfword up, which needs separate address logic, +define double @f4(double %a, double %b, i16 *%base) { +; CHECK: f4: +; CHECK: aghi %r2, 4096 +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 2048 + %val = load i16 *%ptr + %cond = icmp ugt i16 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check negative offsets, which also need separate address logic. +define double @f5(double %a, double %b, i16 *%base) { +; CHECK: f5: +; CHECK: aghi %r2, -2 +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 -1 + %val = load i16 *%ptr + %cond = icmp ugt i16 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CLHHSI does not allow indices. +define double @f6(double %a, double %b, i64 %base, i64 %index) { +; CHECK: f6: +; CHECK: agr {{%r2, %r3|%r3, %r2}} +; CHECK: clhhsi 0({{%r[23]}}), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i16 * + %val = load i16 *%ptr + %cond = icmp ugt i16 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-24.ll b/test/CodeGen/SystemZ/int-cmp-24.ll new file mode 100644 index 0000000..46186cd --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-24.ll @@ -0,0 +1,55 @@ +; Test 16-bit equality comparisons between memory and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the unsigned 16-bit range. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp eq i16 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the unsigned 16-bit range. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp eq i16 %val, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 16-bit range. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK: clhhsi 0(%r2), 32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp eq i16 %val, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 16-bit range. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK: clhhsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp eq i16 %val, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-25.ll b/test/CodeGen/SystemZ/int-cmp-25.ll new file mode 100644 index 0000000..a3a223f --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-25.ll @@ -0,0 +1,55 @@ +; Test 16-bit inequality comparisons between memory and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the unsigned 16-bit range. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp ne i16 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the unsigned 16-bit range. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp ne i16 %val, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 16-bit range. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK: clhhsi 0(%r2), 32768 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp ne i16 %val, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 16-bit range. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK: clhhsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}lh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i16 *%ptr + %cond = icmp ne i16 %val, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-26.ll b/test/CodeGen/SystemZ/int-cmp-26.ll new file mode 100644 index 0000000..31330b2 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-26.ll @@ -0,0 +1,133 @@ +; Test 32-bit equality comparisons that are really between a memory halfword +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the 16-bit unsigned range, with zero extension. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp eq i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the 16-bit unsigned range, with zero extension. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp eq i32 %ext, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, with zero extension. The condition is always false. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp eq i32 %ext, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, with zero extension. +; This condition is also always false. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp eq i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 0, using sign extension. +define double @f5(double %a, double %b, i16 *%ptr) { +; CHECK: f5: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp eq i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 16-bit range, using sign extension. +define double @f6(double %a, double %b, i16 *%ptr) { +; CHECK: f6: +; CHECK: clhhsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp eq i32 %ext, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, using sign extension. +; The condition is always false. +define double @f7(double %a, double %b, i16 *%ptr) { +; CHECK: f7: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp eq i32 %ext, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, using sign extension. +define double @f8(double %a, double %b, i16 *%ptr) { +; CHECK: f8: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp eq i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 16-bit range, using sign extension. +define double @f9(double %a, double %b, i16 *%ptr) { +; CHECK: f9: +; CHECK: clhhsi 0(%r2), 32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp eq i32 %ext, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, using sign extension. +; The condition is always false. +define double @f10(double %a, double %b, i16 *%ptr) { +; CHECK: f10: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp eq i32 %ext, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-27.ll b/test/CodeGen/SystemZ/int-cmp-27.ll new file mode 100644 index 0000000..7cbea3d --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-27.ll @@ -0,0 +1,133 @@ +; Test 32-bit inequality comparisons that are really between a memory halfword +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the 16-bit unsigned range, with zero extension. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp ne i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the 16-bit unsigned range, with zero extension. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp ne i32 %ext, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, with zero extension. The condition is always false. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp ne i32 %ext, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, with zero extension. +; This condition is also always false. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp ne i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 0, using sign extension. +define double @f5(double %a, double %b, i16 *%ptr) { +; CHECK: f5: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ne i32 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 16-bit range, using sign extension. +define double @f6(double %a, double %b, i16 *%ptr) { +; CHECK: f6: +; CHECK: clhhsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ne i32 %ext, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, using sign extension. +; The condition is always false. +define double @f7(double %a, double %b, i16 *%ptr) { +; CHECK: f7: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ne i32 %ext, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, using sign extension. +define double @f8(double %a, double %b, i16 *%ptr) { +; CHECK: f8: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ne i32 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 16-bit range, using sign extension. +define double @f9(double %a, double %b, i16 *%ptr) { +; CHECK: f9: +; CHECK: clhhsi 0(%r2), 32768 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ne i32 %ext, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, using sign extension. +; The condition is always false. +define double @f10(double %a, double %b, i16 *%ptr) { +; CHECK: f10: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ne i32 %ext, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-28.ll b/test/CodeGen/SystemZ/int-cmp-28.ll new file mode 100644 index 0000000..629eb4f --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-28.ll @@ -0,0 +1,133 @@ +; Test 64-bit equality comparisons that are really between a memory halfword +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the 16-bit unsigned range, with zero extension. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp eq i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the 16-bit unsigned range, with zero extension. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp eq i64 %ext, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, with zero extension. The condition is always false. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp eq i64 %ext, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, with zero extension. +; This condition is also always false. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp eq i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 0, using sign extension. +define double @f5(double %a, double %b, i16 *%ptr) { +; CHECK: f5: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp eq i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 16-bit range, using sign extension. +define double @f6(double %a, double %b, i16 *%ptr) { +; CHECK: f6: +; CHECK: clhhsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp eq i64 %ext, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, using sign extension. +; The condition is always false. +define double @f7(double %a, double %b, i16 *%ptr) { +; CHECK: f7: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp eq i64 %ext, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, using sign extension. +define double @f8(double %a, double %b, i16 *%ptr) { +; CHECK: f8: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp eq i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 16-bit range, using sign extension. +define double @f9(double %a, double %b, i16 *%ptr) { +; CHECK: f9: +; CHECK: clhhsi 0(%r2), 32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp eq i64 %ext, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, using sign extension. +; The condition is always false. +define double @f10(double %a, double %b, i16 *%ptr) { +; CHECK: f10: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp eq i64 %ext, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-29.ll b/test/CodeGen/SystemZ/int-cmp-29.ll new file mode 100644 index 0000000..de41dd7 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-29.ll @@ -0,0 +1,133 @@ +; Test 64-bit inequality comparisons that are really between a memory halfword +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the 16-bit unsigned range, with zero extension. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp ne i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the 16-bit unsigned range, with zero extension. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp ne i64 %ext, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, with zero extension. The condition is always false. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp ne i64 %ext, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, with zero extension. +; This condition is also always false. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp ne i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with 0, using sign extension. +define double @f5(double %a, double %b, i16 *%ptr) { +; CHECK: f5: +; CHECK: clhhsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ne i64 %ext, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the signed 16-bit range, using sign extension. +define double @f6(double %a, double %b, i16 *%ptr) { +; CHECK: f6: +; CHECK: clhhsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ne i64 %ext, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, using sign extension. +; The condition is always false. +define double @f7(double %a, double %b, i16 *%ptr) { +; CHECK: f7: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ne i64 %ext, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check comparisons with -1, using sign extension. +define double @f8(double %a, double %b, i16 *%ptr) { +; CHECK: f8: +; CHECK: clhhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ne i64 %ext, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the low end of the signed 16-bit range, using sign extension. +define double @f9(double %a, double %b, i16 *%ptr) { +; CHECK: f9: +; CHECK: clhhsi 0(%r2), 32768 +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ne i64 %ext, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, using sign extension. +; The condition is always false. +define double @f10(double %a, double %b, i16 *%ptr) { +; CHECK: f10: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ne i64 %ext, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-30.ll b/test/CodeGen/SystemZ/int-cmp-30.ll new file mode 100644 index 0000000..713ad8e --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-30.ll @@ -0,0 +1,225 @@ +; Test 32-bit ordered comparisons that are really between a memory halfword +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check unsigned comparison near the low end of the CLHHSI range, using zero +; extension. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp ugt i32 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the low end of the CLHHSI range, using sign +; extension. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ugt i32 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the high end of the CLHHSI range, using zero +; extension. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK: clhhsi 0(%r2), 65534 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp ult i32 %ext, 65534 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the high end of the CLHHSI range, using sign +; extension. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK: clhhsi 0(%r2), 65534 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ult i32 %ext, -2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison above the high end of the CLHHSI range, using zero +; extension. The condition is always true. +define double @f5(double %a, double %b, i16 *%ptr) { +; CHECK: f5: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp ult i32 %ext, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; When using unsigned comparison with sign extension, equality with values +; in the range [32768, MAX-32769] is impossible, and ordered comparisons with +; those values are effectively sign tests. Since such comparisons are +; unlikely to occur in practice, we don't bother optimizing the second case, +; and simply ignore CLHHSI for this range. First check the low end of the +; range. +define double @f6(double %a, double %b, i16 *%ptr) { +; CHECK: f6: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ult i32 %ext, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and then the high end. +define double @f7(double %a, double %b, i16 *%ptr) { +; CHECK: f7: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp ult i32 %ext, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CLHHSI range, using zero +; extension. This is equivalent to unsigned comparison. +define double @f8(double %a, double %b, i16 *%ptr) { +; CHECK: f8: +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp sgt i32 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CLHHSI range, using sign +; extension. This should use CHHSI instead. +define double @f9(double %a, double %b, i16 *%ptr) { +; CHECK: f9: +; CHECK: chhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp sgt i32 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CLHHSI range, using zero +; extension. This is equivalent to unsigned comparison. +define double @f10(double %a, double %b, i16 *%ptr) { +; CHECK: f10: +; CHECK: clhhsi 0(%r2), 65534 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp slt i32 %ext, 65534 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CLHHSI range, using sign +; extension. This should use CHHSI instead. +define double @f11(double %a, double %b, i16 *%ptr) { +; CHECK: f11: +; CHECK: chhsi 0(%r2), -2 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp slt i32 %ext, -2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison above the high end of the CLHHSI range, using zero +; extension. The condition is always true. +define double @f12(double %a, double %b, i16 *%ptr) { +; CHECK: f12: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i32 + %cond = icmp slt i32 %ext, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CHHSI range, using sign +; extension. +define double @f13(double %a, double %b, i16 *%ptr) { +; CHECK: f13: +; CHECK: chhsi 0(%r2), 32766 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp slt i32 %ext, 32766 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison above the high end of the CHHSI range, using sign +; extension. This condition is always true. +define double @f14(double %a, double %b, i16 *%ptr) { +; CHECK: f14: +; CHECK-NOT: chhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp slt i32 %ext, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CHHSI range, using sign +; extension. +define double @f15(double %a, double %b, i16 *%ptr) { +; CHECK: f15: +; CHECK: chhsi 0(%r2), -32767 +; CHECK-NEXT: j{{g?}}g +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp sgt i32 %ext, -32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison below the low end of the CHHSI range, using sign +; extension. This condition is always true. +define double @f16(double %a, double %b, i16 *%ptr) { +; CHECK: f16: +; CHECK-NOT: chhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i32 + %cond = icmp sgt i32 %ext, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-31.ll b/test/CodeGen/SystemZ/int-cmp-31.ll new file mode 100644 index 0000000..cabe9b8 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-31.ll @@ -0,0 +1,225 @@ +; Test 64-bit ordered comparisons that are really between a memory halfword +; and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check unsigned comparison near the low end of the CLHHSI range, using zero +; extension. +define double @f1(double %a, double %b, i16 *%ptr) { +; CHECK: f1: +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp ugt i64 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the low end of the CLHHSI range, using sign +; extension. +define double @f2(double %a, double %b, i16 *%ptr) { +; CHECK: f2: +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ugt i64 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the high end of the CLHHSI range, using zero +; extension. +define double @f3(double %a, double %b, i16 *%ptr) { +; CHECK: f3: +; CHECK: clhhsi 0(%r2), 65534 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp ult i64 %ext, 65534 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison near the high end of the CLHHSI range, using sign +; extension. +define double @f4(double %a, double %b, i16 *%ptr) { +; CHECK: f4: +; CHECK: clhhsi 0(%r2), 65534 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ult i64 %ext, -2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check unsigned comparison above the high end of the CLHHSI range, using zero +; extension. The condition is always true. +define double @f5(double %a, double %b, i16 *%ptr) { +; CHECK: f5: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp ult i64 %ext, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; When using unsigned comparison with sign extension, equality with values +; in the range [32768, MAX-32769] is impossible, and ordered comparisons with +; those values are effectively sign tests. Since such comparisons are +; unlikely to occur in practice, we don't bother optimizing the second case, +; and simply ignore CLHHSI for this range. First check the low end of the +; range. +define double @f6(double %a, double %b, i16 *%ptr) { +; CHECK: f6: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ult i64 %ext, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and then the high end. +define double @f7(double %a, double %b, i16 *%ptr) { +; CHECK: f7: +; CHECK-NOT: clhhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp ult i64 %ext, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CLHHSI range, using zero +; extension. This is equivalent to unsigned comparison. +define double @f8(double %a, double %b, i16 *%ptr) { +; CHECK: f8: +; CHECK: clhhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp sgt i64 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CLHHSI range, using sign +; extension. This should use CHHSI instead. +define double @f9(double %a, double %b, i16 *%ptr) { +; CHECK: f9: +; CHECK: chhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp sgt i64 %ext, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CLHHSI range, using zero +; extension. This is equivalent to unsigned comparison. +define double @f10(double %a, double %b, i16 *%ptr) { +; CHECK: f10: +; CHECK: clhhsi 0(%r2), 65534 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp slt i64 %ext, 65534 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CLHHSI range, using sign +; extension. This should use CHHSI instead. +define double @f11(double %a, double %b, i16 *%ptr) { +; CHECK: f11: +; CHECK: chhsi 0(%r2), -2 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp slt i64 %ext, -2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison above the high end of the CLHHSI range, using zero +; extension. The condition is always true. +define double @f12(double %a, double %b, i16 *%ptr) { +; CHECK: f12: +; CHECK-NOT: cli +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = zext i16 %val to i64 + %cond = icmp slt i64 %ext, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the high end of the CHHSI range, using sign +; extension. +define double @f13(double %a, double %b, i16 *%ptr) { +; CHECK: f13: +; CHECK: chhsi 0(%r2), 32766 +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp slt i64 %ext, 32766 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison above the high end of the CHHSI range, using sign +; extension. This condition is always true. +define double @f14(double %a, double %b, i16 *%ptr) { +; CHECK: f14: +; CHECK-NOT: chhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp slt i64 %ext, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison near the low end of the CHHSI range, using sign +; extension. +define double @f15(double %a, double %b, i16 *%ptr) { +; CHECK: f15: +; CHECK: chhsi 0(%r2), -32767 +; CHECK-NEXT: j{{g?}}g +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp sgt i64 %ext, -32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check signed comparison below the low end of the CHHSI range, using sign +; extension. This condition is always true. +define double @f16(double %a, double %b, i16 *%ptr) { +; CHECK: f16: +; CHECK-NOT: chhsi +; CHECK: br %r14 + %val = load i16 *%ptr + %ext = sext i16 %val to i64 + %cond = icmp sgt i64 %ext, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-32.ll b/test/CodeGen/SystemZ/int-cmp-32.ll new file mode 100644 index 0000000..4bdeebb --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-32.ll @@ -0,0 +1,237 @@ +; Test 32-bit signed comparisons between memory and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ordered comparisons with 0. +define double @f1(double %a, double %b, i32 *%ptr) { +; CHECK: f1: +; CHECK: chsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with 1. +define double @f2(double %a, double %b, i32 *%ptr) { +; CHECK: f2: +; CHECK: chsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with the high end of the signed 16-bit range. +define double @f3(double %a, double %b, i32 *%ptr) { +; CHECK: f3: +; CHECK: chsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which can't use CHSI. +define double @f4(double %a, double %b, i32 *%ptr) { +; CHECK: f4: +; CHECK-NOT: chsi +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with -1. +define double @f5(double %a, double %b, i32 *%ptr) { +; CHECK: f5: +; CHECK: chsi 0(%r2), -1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with the low end of the 16-bit signed range. +define double @f6(double %a, double %b, i32 *%ptr) { +; CHECK: f6: +; CHECK: chsi 0(%r2), -32768 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which can't use CHSI. +define double @f7(double %a, double %b, i32 *%ptr) { +; CHECK: f7: +; CHECK-NOT: chsi +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with 0. +define double @f8(double %a, double %b, i32 *%ptr) { +; CHECK: f8: +; CHECK: chsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with 1. +define double @f9(double %a, double %b, i32 *%ptr) { +; CHECK: f9: +; CHECK: chsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with the high end of the signed 16-bit range. +define double @f10(double %a, double %b, i32 *%ptr) { +; CHECK: f10: +; CHECK: chsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which can't use CHSI. +define double @f11(double %a, double %b, i32 *%ptr) { +; CHECK: f11: +; CHECK-NOT: chsi +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with -1. +define double @f12(double %a, double %b, i32 *%ptr) { +; CHECK: f12: +; CHECK: chsi 0(%r2), -1 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with the low end of the 16-bit signed range. +define double @f13(double %a, double %b, i32 *%ptr) { +; CHECK: f13: +; CHECK: chsi 0(%r2), -32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which should be treated as a positive value. +define double @f14(double %a, double %b, i32 *%ptr) { +; CHECK: f14: +; CHECK-NOT: chsi +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CHSI range. +define double @f15(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f15: +; CHECK: chsi 4092(%r3), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1023 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word up, which needs separate address logic, +define double @f16(double %a, double %b, i32 *%base) { +; CHECK: f16: +; CHECK: aghi %r2, 4096 +; CHECK: chsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1024 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check negative offsets, which also need separate address logic. +define double @f17(double %a, double %b, i32 *%base) { +; CHECK: f17: +; CHECK: aghi %r2, -4 +; CHECK: chsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -1 + %val = load i32 *%ptr + %cond = icmp slt i32 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CHSI does not allow indices. +define double @f18(double %a, double %b, i64 %base, i64 %index) { +; CHECK: f18: +; CHECK: agr {{%r2, %r3|%r3, %r2}} +; CHECK: chsi 0({{%r[23]}}), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %val = load i32 *%ptr + %cond = icmp slt i32 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-33.ll b/test/CodeGen/SystemZ/int-cmp-33.ll new file mode 100644 index 0000000..0144806 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-33.ll @@ -0,0 +1,139 @@ +; Test 32-bit unsigned comparisons between memory and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ordered comparisons with a constant near the low end of the unsigned +; 16-bit range. +define double @f1(double %a, double %b, i32 *%ptr) { +; CHECK: f1: +; CHECK: clfhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp ugt i32 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with the high end of the unsigned 16-bit range. +define double @f2(double %a, double %b, i32 *%ptr) { +; CHECK: f2: +; CHECK: clfhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp ult i32 %val, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which can't use CLFHSI. +define double @f3(double %a, double %b, i32 *%ptr) { +; CHECK: f3: +; CHECK-NOT: clfhsi +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp ult i32 %val, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with 32768, the lowest value for which +; we prefer CLFHSI to CHSI. +define double @f4(double %a, double %b, i32 *%ptr) { +; CHECK: f4: +; CHECK: clfhsi 0(%r2), 32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with the high end of the unsigned 16-bit range. +define double @f5(double %a, double %b, i32 *%ptr) { +; CHECK: f5: +; CHECK: clfhsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which can't use CLFHSI. +define double @f6(double %a, double %b, i32 *%ptr) { +; CHECK: f6: +; CHECK-NOT: clfhsi +; CHECK: br %r14 + %val = load i32 *%ptr + %cond = icmp eq i32 %val, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLFHSI range. +define double @f7(double %a, double %b, i32 %i1, i32 *%base) { +; CHECK: f7: +; CHECK: clfhsi 4092(%r3), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1023 + %val = load i32 *%ptr + %cond = icmp ugt i32 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next word up, which needs separate address logic, +define double @f8(double %a, double %b, i32 *%base) { +; CHECK: f8: +; CHECK: aghi %r2, 4096 +; CHECK: clfhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1024 + %val = load i32 *%ptr + %cond = icmp ugt i32 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check negative offsets, which also need separate address logic. +define double @f9(double %a, double %b, i32 *%base) { +; CHECK: f9: +; CHECK: aghi %r2, -4 +; CHECK: clfhsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -1 + %val = load i32 *%ptr + %cond = icmp ugt i32 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CLFHSI does not allow indices. +define double @f10(double %a, double %b, i64 %base, i64 %index) { +; CHECK: f10: +; CHECK: agr {{%r2, %r3|%r3, %r2}} +; CHECK: clfhsi 0({{%r[23]}}), 1 +; CHECK-NEXT: j{{g?}}h +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %val = load i32 *%ptr + %cond = icmp ugt i32 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-34.ll b/test/CodeGen/SystemZ/int-cmp-34.ll new file mode 100644 index 0000000..b10bd4e --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-34.ll @@ -0,0 +1,237 @@ +; Test 64-bit signed comparisons between memory and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ordered comparisons with 0. +define double @f1(double %a, double %b, i64 *%ptr) { +; CHECK: f1: +; CHECK: cghsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with 1. +define double @f2(double %a, double %b, i64 *%ptr) { +; CHECK: f2: +; CHECK: cghsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with the high end of the signed 16-bit range. +define double @f3(double %a, double %b, i64 *%ptr) { +; CHECK: f3: +; CHECK: cghsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which can't use CGHSI. +define double @f4(double %a, double %b, i64 *%ptr) { +; CHECK: f4: +; CHECK-NOT: cghsi +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with -1. +define double @f5(double %a, double %b, i64 *%ptr) { +; CHECK: f5: +; CHECK: cghsi 0(%r2), -1 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with the low end of the 16-bit signed range. +define double @f6(double %a, double %b, i64 *%ptr) { +; CHECK: f6: +; CHECK: cghsi 0(%r2), -32768 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which should be treated as a positive value. +define double @f7(double %a, double %b, i64 *%ptr) { +; CHECK: f7: +; CHECK-NOT: cghsi +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with 0. +define double @f8(double %a, double %b, i64 *%ptr) { +; CHECK: f8: +; CHECK: cghsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with 1. +define double @f9(double %a, double %b, i64 *%ptr) { +; CHECK: f9: +; CHECK: cghsi 0(%r2), 1 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, 1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with the high end of the signed 16-bit range. +define double @f10(double %a, double %b, i64 *%ptr) { +; CHECK: f10: +; CHECK: cghsi 0(%r2), 32767 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, 32767 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which can't use CGHSI. +define double @f11(double %a, double %b, i64 *%ptr) { +; CHECK: f11: +; CHECK-NOT: cghsi +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with -1. +define double @f12(double %a, double %b, i64 *%ptr) { +; CHECK: f12: +; CHECK: cghsi 0(%r2), -1 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, -1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with the low end of the 16-bit signed range. +define double @f13(double %a, double %b, i64 *%ptr) { +; CHECK: f13: +; CHECK: cghsi 0(%r2), -32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, -32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value down, which should be treated as a positive value. +define double @f14(double %a, double %b, i64 *%ptr) { +; CHECK: f14: +; CHECK-NOT: cghsi +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, -32769 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CGHSI range. +define double @f15(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f15: +; CHECK: cghsi 4088(%r3), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 511 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next doubleword up, which needs separate address logic, +define double @f16(double %a, double %b, i64 *%base) { +; CHECK: f16: +; CHECK: aghi %r2, 4096 +; CHECK: cghsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 512 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check negative offsets, which also need separate address logic. +define double @f17(double %a, double %b, i64 *%base) { +; CHECK: f17: +; CHECK: aghi %r2, -8 +; CHECK: cghsi 0(%r2), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -1 + %val = load i64 *%ptr + %cond = icmp slt i64 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CGHSI does not allow indices. +define double @f18(double %a, double %b, i64 %base, i64 %index) { +; CHECK: f18: +; CHECK: agr {{%r2, %r3|%r3, %r2}} +; CHECK: cghsi 0({{%r[23]}}), 0 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i64 * + %val = load i64 *%ptr + %cond = icmp slt i64 %val, 0 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-35.ll b/test/CodeGen/SystemZ/int-cmp-35.ll new file mode 100644 index 0000000..9934906 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-35.ll @@ -0,0 +1,139 @@ +; Test 64-bit unsigned comparisons between memory and a constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check ordered comparisons with a constant near the low end of the unsigned +; 16-bit range. +define double @f1(double %a, double %b, i64 *%ptr) { +; CHECK: f1: +; CHECK: clghsi 0(%r2), 2 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp ult i64 %val, 2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check ordered comparisons with the high end of the unsigned 16-bit range. +define double @f2(double %a, double %b, i64 *%ptr) { +; CHECK: f2: +; CHECK: clghsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp ult i64 %val, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which can't use CLGHSI. +define double @f3(double %a, double %b, i64 *%ptr) { +; CHECK: f3: +; CHECK-NOT: clghsi +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp ult i64 %val, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with 32768, the lowest value for which +; we prefer CLGHSI to CGHSI. +define double @f4(double %a, double %b, i64 *%ptr) { +; CHECK: f4: +; CHECK: clghsi 0(%r2), 32768 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, 32768 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check equality comparisons with the high end of the unsigned 16-bit range. +define double @f5(double %a, double %b, i64 *%ptr) { +; CHECK: f5: +; CHECK: clghsi 0(%r2), 65535 +; CHECK-NEXT: j{{g?}}e +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, 65535 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next value up, which can't use CLGHSI. +define double @f6(double %a, double %b, i64 *%ptr) { +; CHECK: f6: +; CHECK-NOT: clghsi +; CHECK: br %r14 + %val = load i64 *%ptr + %cond = icmp eq i64 %val, 65536 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the high end of the CLGHSI range. +define double @f7(double %a, double %b, i64 %i1, i64 *%base) { +; CHECK: f7: +; CHECK: clghsi 4088(%r3), 2 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 511 + %val = load i64 *%ptr + %cond = icmp ult i64 %val, 2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the next doubleword up, which needs separate address logic, +define double @f8(double %a, double %b, i64 *%base) { +; CHECK: f8: +; CHECK: aghi %r2, 4096 +; CHECK: clghsi 0(%r2), 2 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 512 + %val = load i64 *%ptr + %cond = icmp ult i64 %val, 2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check negative offsets, which also need separate address logic. +define double @f9(double %a, double %b, i64 *%base) { +; CHECK: f9: +; CHECK: aghi %r2, -8 +; CHECK: clghsi 0(%r2), 2 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -1 + %val = load i64 *%ptr + %cond = icmp ult i64 %val, 2 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check that CLGHSI does not allow indices. +define double @f10(double %a, double %b, i64 %base, i64 %index) { +; CHECK: f10: +; CHECK: agr {{%r2, %r3|%r3, %r2}} +; CHECK: clghsi 0({{%r[23]}}), 2 +; CHECK-NEXT: j{{g?}}l +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i64 * + %val = load i64 *%ptr + %cond = icmp ult i64 %val, 2 + %res = select i1 %cond, double %a, double %b + ret double %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-36.ll b/test/CodeGen/SystemZ/int-cmp-36.ll new file mode 100644 index 0000000..0813594 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-36.ll @@ -0,0 +1,81 @@ +; Test 32-bit comparisons in which the second operand is sign-extended +; from a PC-relative i16. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g = global i16 1 + +; Check signed comparison. +define i32 @f1(i32 %src1) { +; CHECK: f1: +; CHECK: chrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = sext i16 %val to i32 + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check unsigned comparison, which cannot use CHRL. +define i32 @f2(i32 %src1) { +; CHECK: f2: +; CHECK-NOT: chrl +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = sext i16 %val to i32 + %cond = icmp ult i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check equality. +define i32 @f3(i32 %src1) { +; CHECK: f3: +; CHECK: chrl %r2, g +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = sext i16 %val to i32 + %cond = icmp eq i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check inequality. +define i32 @f4(i32 %src1) { +; CHECK: f4: +; CHECK: chrl %r2, g +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = sext i16 %val to i32 + %cond = icmp ne i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-37.ll b/test/CodeGen/SystemZ/int-cmp-37.ll new file mode 100644 index 0000000..aebd1f6 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-37.ll @@ -0,0 +1,81 @@ +; Test 32-bit comparisons in which the second operand is zero-extended +; from a PC-relative i16. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g = global i16 1 + +; Check unsigned comparison. +define i32 @f1(i32 %src1) { +; CHECK: f1: +; CHECK: clhrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = zext i16 %val to i32 + %cond = icmp ult i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check signed comparison. +define i32 @f2(i32 %src1) { +; CHECK: f2: +; CHECK-NOT: clhrl +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = zext i16 %val to i32 + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check equality. +define i32 @f3(i32 %src1) { +; CHECK: f3: +; CHECK: clhrl %r2, g +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = zext i16 %val to i32 + %cond = icmp eq i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check inequality. +define i32 @f4(i32 %src1) { +; CHECK: f4: +; CHECK: clhrl %r2, g +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = zext i16 %val to i32 + %cond = icmp ne i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-38.ll b/test/CodeGen/SystemZ/int-cmp-38.ll new file mode 100644 index 0000000..3470730 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-38.ll @@ -0,0 +1,78 @@ +; Test 32-bit comparisons in which the second operand is a PC-relative +; variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g = global i32 1 + +; Check signed comparisons. +define i32 @f1(i32 %src1) { +; CHECK: f1: +; CHECK: crl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %src2 = load i32 *@g + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check unsigned comparisons. +define i32 @f2(i32 %src1) { +; CHECK: f2: +; CHECK: clrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %src2 = load i32 *@g + %cond = icmp ult i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; Check equality, which can use CRL or CLRL. +define i32 @f3(i32 %src1) { +; CHECK: f3: +; CHECK: c{{l?}}rl %r2, g +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 +entry: + %src2 = load i32 *@g + %cond = icmp eq i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +; ...likewise inequality. +define i32 @f4(i32 %src1) { +; CHECK: f4: +; CHECK: c{{l?}}rl %r2, g +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 +entry: + %src2 = load i32 *@g + %cond = icmp ne i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-39.ll b/test/CodeGen/SystemZ/int-cmp-39.ll new file mode 100644 index 0000000..1129dce --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-39.ll @@ -0,0 +1,81 @@ +; Test 64-bit comparisons in which the second operand is sign-extended +; from a PC-relative i16. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g = global i16 1 + +; Check signed comparison. +define i64 @f1(i64 %src1) { +; CHECK: f1: +; CHECK: cghrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = sext i16 %val to i64 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check unsigned comparison, which cannot use CHRL. +define i64 @f2(i64 %src1) { +; CHECK: f2: +; CHECK-NOT: cghrl +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = sext i16 %val to i64 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check equality. +define i64 @f3(i64 %src1) { +; CHECK: f3: +; CHECK: cghrl %r2, g +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = sext i16 %val to i64 + %cond = icmp eq i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check inequality. +define i64 @f4(i64 %src1) { +; CHECK: f4: +; CHECK: cghrl %r2, g +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = sext i16 %val to i64 + %cond = icmp ne i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-40.ll b/test/CodeGen/SystemZ/int-cmp-40.ll new file mode 100644 index 0000000..8d9fd9a --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-40.ll @@ -0,0 +1,81 @@ +; Test 64-bit comparisons in which the second operand is zero-extended +; from a PC-relative i16. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g = global i16 1 + +; Check unsigned comparison. +define i64 @f1(i64 %src1) { +; CHECK: f1: +; CHECK: clghrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = zext i16 %val to i64 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check signed comparison. +define i64 @f2(i64 %src1) { +; CHECK: f2: +; CHECK-NOT: clghrl +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = zext i16 %val to i64 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check equality. +define i64 @f3(i64 %src1) { +; CHECK: f3: +; CHECK: clghrl %r2, g +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = zext i16 %val to i64 + %cond = icmp eq i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check inequality. +define i64 @f4(i64 %src1) { +; CHECK: f4: +; CHECK: clghrl %r2, g +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 +entry: + %val = load i16 *@g + %src2 = zext i16 %val to i64 + %cond = icmp ne i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-41.ll b/test/CodeGen/SystemZ/int-cmp-41.ll new file mode 100644 index 0000000..0808bff --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-41.ll @@ -0,0 +1,81 @@ +; Test 64-bit comparisons in which the second operand is sign-extended +; from a PC-relative i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g = global i32 1 + +; Check signed comparison. +define i64 @f1(i64 %src1) { +; CHECK: f1: +; CHECK: cgfrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src2 = sext i32 %val to i64 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check unsigned comparison, which cannot use CHRL. +define i64 @f2(i64 %src1) { +; CHECK: f2: +; CHECK-NOT: cgfrl +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src2 = sext i32 %val to i64 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check equality. +define i64 @f3(i64 %src1) { +; CHECK: f3: +; CHECK: cgfrl %r2, g +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src2 = sext i32 %val to i64 + %cond = icmp eq i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check inequality. +define i64 @f4(i64 %src1) { +; CHECK: f4: +; CHECK: cgfrl %r2, g +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src2 = sext i32 %val to i64 + %cond = icmp ne i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-42.ll b/test/CodeGen/SystemZ/int-cmp-42.ll new file mode 100644 index 0000000..5c67581 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-42.ll @@ -0,0 +1,81 @@ +; Test 64-bit comparisons in which the second operand is zero-extended +; from a PC-relative i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g = global i32 1 + +; Check unsigned comparison. +define i64 @f1(i64 %src1) { +; CHECK: f1: +; CHECK: clgfrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src2 = zext i32 %val to i64 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check signed comparison. +define i64 @f2(i64 %src1) { +; CHECK: f2: +; CHECK-NOT: clgfrl +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src2 = zext i32 %val to i64 + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check equality. +define i64 @f3(i64 %src1) { +; CHECK: f3: +; CHECK: clgfrl %r2, g +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src2 = zext i32 %val to i64 + %cond = icmp eq i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check inequality. +define i64 @f4(i64 %src1) { +; CHECK: f4: +; CHECK: clgfrl %r2, g +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 +entry: + %val = load i32 *@g + %src2 = zext i32 %val to i64 + %cond = icmp ne i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-cmp-43.ll b/test/CodeGen/SystemZ/int-cmp-43.ll new file mode 100644 index 0000000..f387293 --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-43.ll @@ -0,0 +1,78 @@ +; Test 64-bit comparisons in which the second operand is a PC-relative +; variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g = global i64 1 + +; Check signed comparisons. +define i64 @f1(i64 %src1) { +; CHECK: f1: +; CHECK: cgrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %src2 = load i64 *@g + %cond = icmp slt i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check unsigned comparisons. +define i64 @f2(i64 %src1) { +; CHECK: f2: +; CHECK: clgrl %r2, g +; CHECK-NEXT: j{{g?}}l +; CHECK: br %r14 +entry: + %src2 = load i64 *@g + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; Check equality, which can use CRL or CLRL. +define i64 @f3(i64 %src1) { +; CHECK: f3: +; CHECK: c{{l?}}grl %r2, g +; CHECK-NEXT: j{{g?}}e +; CHECK: br %r14 +entry: + %src2 = load i64 *@g + %cond = icmp eq i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} + +; ...likewise inequality. +define i64 @f4(i64 %src1) { +; CHECK: f4: +; CHECK: c{{l?}}grl %r2, g +; CHECK-NEXT: j{{g?}}lh +; CHECK: br %r14 +entry: + %src2 = load i64 *@g + %cond = icmp ne i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/int-const-01.ll b/test/CodeGen/SystemZ/int-const-01.ll new file mode 100644 index 0000000..a580154 --- /dev/null +++ b/test/CodeGen/SystemZ/int-const-01.ll @@ -0,0 +1,91 @@ +; Test loading of 32-bit constants. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check 0. +define i32 @f1() { +; CHECK: f1: +; CHECK: lhi %r2, 0 +; CHECK: br %r14 + ret i32 0 +} + +; Check the high end of the LHI range. +define i32 @f2() { +; CHECK: f2: +; CHECK: lhi %r2, 32767 +; CHECK: br %r14 + ret i32 32767 +} + +; Check the next value up, which must use LLILL instead. +define i32 @f3() { +; CHECK: f3: +; CHECK: llill %r2, 32768 +; CHECK: br %r14 + ret i32 32768 +} + +; Check the high end of the LLILL range. +define i32 @f4() { +; CHECK: f4: +; CHECK: llill %r2, 65535 +; CHECK: br %r14 + ret i32 65535 +} + +; Check the first useful LLILH value, which is the next one up. +define i32 @f5() { +; CHECK: f5: +; CHECK: llilh %r2, 1 +; CHECK: br %r14 + ret i32 65536 +} + +; Check the first useful IILF value, which is the next one up again. +define i32 @f6() { +; CHECK: f6: +; CHECK: iilf %r2, 65537 +; CHECK: br %r14 + ret i32 65537 +} + +; Check the high end of the LLILH range. +define i32 @f7() { +; CHECK: f7: +; CHECK: llilh %r2, 65535 +; CHECK: br %r14 + ret i32 -65536 +} + +; Check the next value up, which must use IILF. +define i32 @f8() { +; CHECK: f8: +; CHECK: iilf %r2, 4294901761 +; CHECK: br %r14 + ret i32 -65535 +} + +; Check the highest useful IILF value, 0xffff7fff +define i32 @f9() { +; CHECK: f9: +; CHECK: iilf %r2, 4294934527 +; CHECK: br %r14 + ret i32 -32769 +} + +; Check the next value up, which should use LHI. +define i32 @f10() { +; CHECK: f10: +; CHECK: lhi %r2, -32768 +; CHECK: br %r14 + ret i32 -32768 +} + +; Check -1. +define i32 @f11() { +; CHECK: f11: +; CHECK: lhi %r2, -1 +; CHECK: br %r14 + ret i32 -1 +} diff --git a/test/CodeGen/SystemZ/int-const-02.ll b/test/CodeGen/SystemZ/int-const-02.ll new file mode 100644 index 0000000..b345e3f --- /dev/null +++ b/test/CodeGen/SystemZ/int-const-02.ll @@ -0,0 +1,251 @@ +; Test loading of 64-bit constants. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check 0. +define i64 @f1() { +; CHECK: f1: +; CHECK: lghi %r2, 0 +; CHECK-NEXT: br %r14 + ret i64 0 +} + +; Check the high end of the LGHI range. +define i64 @f2() { +; CHECK: f2: +; CHECK: lghi %r2, 32767 +; CHECK-NEXT: br %r14 + ret i64 32767 +} + +; Check the next value up, which must use LLILL instead. +define i64 @f3() { +; CHECK: f3: +; CHECK: llill %r2, 32768 +; CHECK-NEXT: br %r14 + ret i64 32768 +} + +; Check the high end of the LLILL range. +define i64 @f4() { +; CHECK: f4: +; CHECK: llill %r2, 65535 +; CHECK-NEXT: br %r14 + ret i64 65535 +} + +; Check the first useful LLILH value, which is the next one up. +define i64 @f5() { +; CHECK: f5: +; CHECK: llilh %r2, 1 +; CHECK-NEXT: br %r14 + ret i64 65536 +} + +; Check the first useful LGFI value, which is the next one up again. +define i64 @f6() { +; CHECK: f6: +; CHECK: lgfi %r2, 65537 +; CHECK-NEXT: br %r14 + ret i64 65537 +} + +; Check the high end of the LGFI range. +define i64 @f7() { +; CHECK: f7: +; CHECK: lgfi %r2, 2147483647 +; CHECK-NEXT: br %r14 + ret i64 2147483647 +} + +; Check the next value up, which should use LLILH instead. +define i64 @f8() { +; CHECK: f8: +; CHECK: llilh %r2, 32768 +; CHECK-NEXT: br %r14 + ret i64 2147483648 +} + +; Check the next value up again, which should use LLILF. +define i64 @f9() { +; CHECK: f9: +; CHECK: llilf %r2, 2147483649 +; CHECK-NEXT: br %r14 + ret i64 2147483649 +} + +; Check the high end of the LLILH range. +define i64 @f10() { +; CHECK: f10: +; CHECK: llilh %r2, 65535 +; CHECK-NEXT: br %r14 + ret i64 4294901760 +} + +; Check the next value up, which must use LLILF. +define i64 @f11() { +; CHECK: f11: +; CHECK: llilf %r2, 4294901761 +; CHECK-NEXT: br %r14 + ret i64 4294901761 +} + +; Check the high end of the LLILF range. +define i64 @f12() { +; CHECK: f12: +; CHECK: llilf %r2, 4294967295 +; CHECK-NEXT: br %r14 + ret i64 4294967295 +} + +; Check the lowest useful LLIHL value, which is the next one up. +define i64 @f13() { +; CHECK: f13: +; CHECK: llihl %r2, 1 +; CHECK-NEXT: br %r14 + ret i64 4294967296 +} + +; Check the next value up, which must use a combination of two instructions. +define i64 @f14() { +; CHECK: f14: +; CHECK: llihl %r2, 1 +; CHECK-NEXT: oill %r2, 1 +; CHECK-NEXT: br %r14 + ret i64 4294967297 +} + +; Check the high end of the OILL range. +define i64 @f15() { +; CHECK: f15: +; CHECK: llihl %r2, 1 +; CHECK-NEXT: oill %r2, 65535 +; CHECK-NEXT: br %r14 + ret i64 4295032831 +} + +; Check the next value up, which should use OILH instead. +define i64 @f16() { +; CHECK: f16: +; CHECK: llihl %r2, 1 +; CHECK-NEXT: oilh %r2, 1 +; CHECK-NEXT: br %r14 + ret i64 4295032832 +} + +; Check the next value up again, which should use OILF. +define i64 @f17() { +; CHECK: f17: +; CHECK: llihl %r2, 1 +; CHECK-NEXT: oilf %r2, 65537 +; CHECK-NEXT: br %r14 + ret i64 4295032833 +} + +; Check the high end of the OILH range. +define i64 @f18() { +; CHECK: f18: +; CHECK: llihl %r2, 1 +; CHECK-NEXT: oilh %r2, 65535 +; CHECK-NEXT: br %r14 + ret i64 8589869056 +} + +; Check the high end of the OILF range. +define i64 @f19() { +; CHECK: f19: +; CHECK: llihl %r2, 1 +; CHECK-NEXT: oilf %r2, 4294967295 +; CHECK-NEXT: br %r14 + ret i64 8589934591 +} + +; Check the high end of the LLIHL range. +define i64 @f20() { +; CHECK: f20: +; CHECK: llihl %r2, 65535 +; CHECK-NEXT: br %r14 + ret i64 281470681743360 +} + +; Check the lowest useful LLIHH value, which is 1<<32 greater than the above. +define i64 @f21() { +; CHECK: f21: +; CHECK: llihh %r2, 1 +; CHECK-NEXT: br %r14 + ret i64 281474976710656 +} + +; Check the lowest useful LLIHF value, which is 1<<32 greater again. +define i64 @f22() { +; CHECK: f22: +; CHECK: llihf %r2, 65537 +; CHECK-NEXT: br %r14 + ret i64 281479271677952 +} + +; Check the highest end of the LLIHH range. +define i64 @f23() { +; CHECK: f23: +; CHECK: llihh %r2, 65535 +; CHECK-NEXT: br %r14 + ret i64 -281474976710656 +} + +; Check the next value up, which must use OILL too. +define i64 @f24() { +; CHECK: f24: +; CHECK: llihh %r2, 65535 +; CHECK-NEXT: oill %r2, 1 +; CHECK-NEXT: br %r14 + ret i64 -281474976710655 +} + +; Check the high end of the LLIHF range. +define i64 @f25() { +; CHECK: f25: +; CHECK: llihf %r2, 4294967295 +; CHECK-NEXT: br %r14 + ret i64 -4294967296 +} + +; Check -1. +define i64 @f26() { +; CHECK: f26: +; CHECK: lghi %r2, -1 +; CHECK-NEXT: br %r14 + ret i64 -1 +} + +; Check the low end of the LGHI range. +define i64 @f27() { +; CHECK: f27: +; CHECK: lghi %r2, -32768 +; CHECK-NEXT: br %r14 + ret i64 -32768 +} + +; Check the next value down, which must use LGFI instead. +define i64 @f28() { +; CHECK: f28: +; CHECK: lgfi %r2, -32769 +; CHECK-NEXT: br %r14 + ret i64 -32769 +} + +; Check the low end of the LGFI range. +define i64 @f29() { +; CHECK: f29: +; CHECK: lgfi %r2, -2147483648 +; CHECK-NEXT: br %r14 + ret i64 -2147483648 +} + +; Check the next value down, which needs a two-instruction sequence. +define i64 @f30() { +; CHECK: f30: +; CHECK: llihf %r2, 4294967295 +; CHECK-NEXT: oilf %r2, 2147483647 +; CHECK-NEXT: br %r14 + ret i64 -2147483649 +} diff --git a/test/CodeGen/SystemZ/int-const-03.ll b/test/CodeGen/SystemZ/int-const-03.ll new file mode 100644 index 0000000..807b7e4 --- /dev/null +++ b/test/CodeGen/SystemZ/int-const-03.ll @@ -0,0 +1,166 @@ +; Test moves of integers to byte memory locations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the unsigned range. +define void @f1(i8 *%ptr) { +; CHECK: f1: +; CHECK: mvi 0(%r2), 0 +; CHECK: br %r14 + store i8 0, i8 *%ptr + ret void +} + +; Check the high end of the signed range. +define void @f2(i8 *%ptr) { +; CHECK: f2: +; CHECK: mvi 0(%r2), 127 +; CHECK: br %r14 + store i8 127, i8 *%ptr + ret void +} + +; Check the next value up. +define void @f3(i8 *%ptr) { +; CHECK: f3: +; CHECK: mvi 0(%r2), 128 +; CHECK: br %r14 + store i8 -128, i8 *%ptr + ret void +} + +; Check the high end of the unsigned range. +define void @f4(i8 *%ptr) { +; CHECK: f4: +; CHECK: mvi 0(%r2), 255 +; CHECK: br %r14 + store i8 255, i8 *%ptr + ret void +} + +; Check -1. +define void @f5(i8 *%ptr) { +; CHECK: f5: +; CHECK: mvi 0(%r2), 255 +; CHECK: br %r14 + store i8 -1, i8 *%ptr + ret void +} + +; Check the low end of the signed range. +define void @f6(i8 *%ptr) { +; CHECK: f6: +; CHECK: mvi 0(%r2), 128 +; CHECK: br %r14 + store i8 -128, i8 *%ptr + ret void +} + +; Check the next value down. +define void @f7(i8 *%ptr) { +; CHECK: f7: +; CHECK: mvi 0(%r2), 127 +; CHECK: br %r14 + store i8 -129, i8 *%ptr + ret void +} + +; Check the high end of the MVI range. +define void @f8(i8 *%src) { +; CHECK: f8: +; CHECK: mvi 4095(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4095 + store i8 42, i8 *%ptr + ret void +} + +; Check the next byte up, which should use MVIY instead of MVI. +define void @f9(i8 *%src) { +; CHECK: f9: +; CHECK: mviy 4096(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4096 + store i8 42, i8 *%ptr + ret void +} + +; Check the high end of the MVIY range. +define void @f10(i8 *%src) { +; CHECK: f10: +; CHECK: mviy 524287(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + store i8 42, i8 *%ptr + ret void +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f11(i8 *%src) { +; CHECK: f11: +; CHECK: agfi %r2, 524288 +; CHECK: mvi 0(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + store i8 42, i8 *%ptr + ret void +} + +; Check the high end of the negative MVIY range. +define void @f12(i8 *%src) { +; CHECK: f12: +; CHECK: mviy -1(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + store i8 42, i8 *%ptr + ret void +} + +; Check the low end of the MVIY range. +define void @f13(i8 *%src) { +; CHECK: f13: +; CHECK: mviy -524288(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + store i8 42, i8 *%ptr + ret void +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f14(i8 *%src) { +; CHECK: f14: +; CHECK: agfi %r2, -524289 +; CHECK: mvi 0(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + store i8 42, i8 *%ptr + ret void +} + +; Check that MVI does not allow an index +define void @f15(i64 %src, i64 %index) { +; CHECK: f15: +; CHECK: agr %r2, %r3 +; CHECK: mvi 4095(%r2), 42 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to i8 * + store i8 42, i8 *%ptr + ret void +} + +; Check that MVIY does not allow an index +define void @f16(i64 %src, i64 %index) { +; CHECK: f16: +; CHECK: agr %r2, %r3 +; CHECK: mviy 4096(%r2), 42 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i8 * + store i8 42, i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-const-04.ll b/test/CodeGen/SystemZ/int-const-04.ll new file mode 100644 index 0000000..41c7306 --- /dev/null +++ b/test/CodeGen/SystemZ/int-const-04.ll @@ -0,0 +1,111 @@ +; Test moves of integers to 2-byte memory locations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the unsigned range. +define void @f1(i16 *%ptr) { +; CHECK: f1: +; CHECK: mvhhi 0(%r2), 0 +; CHECK: br %r14 + store i16 0, i16 *%ptr + ret void +} + +; Check the high end of the signed range. +define void @f2(i16 *%ptr) { +; CHECK: f2: +; CHECK: mvhhi 0(%r2), 32767 +; CHECK: br %r14 + store i16 32767, i16 *%ptr + ret void +} + +; Check the next value up. +define void @f3(i16 *%ptr) { +; CHECK: f3: +; CHECK: mvhhi 0(%r2), -32768 +; CHECK: br %r14 + store i16 -32768, i16 *%ptr + ret void +} + +; Check the high end of the unsigned range. +define void @f4(i16 *%ptr) { +; CHECK: f4: +; CHECK: mvhhi 0(%r2), -1 +; CHECK: br %r14 + store i16 65535, i16 *%ptr + ret void +} + +; Check -1. +define void @f5(i16 *%ptr) { +; CHECK: f5: +; CHECK: mvhhi 0(%r2), -1 +; CHECK: br %r14 + store i16 -1, i16 *%ptr + ret void +} + +; Check the low end of the signed range. +define void @f6(i16 *%ptr) { +; CHECK: f6: +; CHECK: mvhhi 0(%r2), -32768 +; CHECK: br %r14 + store i16 -32768, i16 *%ptr + ret void +} + +; Check the next value down. +define void @f7(i16 *%ptr) { +; CHECK: f7: +; CHECK: mvhhi 0(%r2), 32767 +; CHECK: br %r14 + store i16 -32769, i16 *%ptr + ret void +} + +; Check the high end of the MVHHI range. +define void @f8(i16 *%a) { +; CHECK: f8: +; CHECK: mvhhi 4094(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i16 *%a, i64 2047 + store i16 42, i16 *%ptr + ret void +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f9(i16 *%a) { +; CHECK: f9: +; CHECK: aghi %r2, 4096 +; CHECK: mvhhi 0(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i16 *%a, i64 2048 + store i16 42, i16 *%ptr + ret void +} + +; Check negative displacements, which also need separate address logic. +define void @f10(i16 *%a) { +; CHECK: f10: +; CHECK: aghi %r2, -2 +; CHECK: mvhhi 0(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i16 *%a, i64 -1 + store i16 42, i16 *%ptr + ret void +} + +; Check that MVHHI does not allow an index +define void @f11(i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: agr %r2, %r3 +; CHECK: mvhhi 0(%r2), 42 +; CHECK: br %r14 + %add = add i64 %src, %index + %ptr = inttoptr i64 %add to i16 * + store i16 42, i16 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-const-05.ll b/test/CodeGen/SystemZ/int-const-05.ll new file mode 100644 index 0000000..b85fd6b --- /dev/null +++ b/test/CodeGen/SystemZ/int-const-05.ll @@ -0,0 +1,102 @@ +; Test moves of integers to 4-byte memory locations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check moves of zero. +define void @f1(i32 *%a) { +; CHECK: f1: +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + store i32 0, i32 *%a + ret void +} + +; Check the high end of the signed 16-bit range. +define void @f2(i32 *%a) { +; CHECK: f2: +; CHECK: mvhi 0(%r2), 32767 +; CHECK: br %r14 + store i32 32767, i32 *%a + ret void +} + +; Check the next value up, which can't use MVHI. +define void @f3(i32 *%a) { +; CHECK: f3: +; CHECK-NOT: mvhi +; CHECK: br %r14 + store i32 32768, i32 *%a + ret void +} + +; Check moves of -1. +define void @f4(i32 *%a) { +; CHECK: f4: +; CHECK: mvhi 0(%r2), -1 +; CHECK: br %r14 + store i32 -1, i32 *%a + ret void +} + +; Check the low end of the MVHI range. +define void @f5(i32 *%a) { +; CHECK: f5: +; CHECK: mvhi 0(%r2), -32768 +; CHECK: br %r14 + store i32 -32768, i32 *%a + ret void +} + +; Check the next value down, which can't use MVHI. +define void @f6(i32 *%a) { +; CHECK: f6: +; CHECK-NOT: mvhi +; CHECK: br %r14 + store i32 -32769, i32 *%a + ret void +} + +; Check the high end of the MVHI range. +define void @f7(i32 *%a) { +; CHECK: f7: +; CHECK: mvhi 4092(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i32 *%a, i64 1023 + store i32 42, i32 *%ptr + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(i32 *%a) { +; CHECK: f8: +; CHECK: aghi %r2, 4096 +; CHECK: mvhi 0(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i32 *%a, i64 1024 + store i32 42, i32 *%ptr + ret void +} + +; Check negative displacements, which also need separate address logic. +define void @f9(i32 *%a) { +; CHECK: f9: +; CHECK: aghi %r2, -4 +; CHECK: mvhi 0(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i32 *%a, i64 -1 + store i32 42, i32 *%ptr + ret void +} + +; Check that MVHI does not allow an index +define void @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: agr %r2, %r3 +; CHECK: mvhi 0(%r2), 42 +; CHECK: br %r14 + %add = add i64 %src, %index + %ptr = inttoptr i64 %add to i32 * + store i32 42, i32 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-const-06.ll b/test/CodeGen/SystemZ/int-const-06.ll new file mode 100644 index 0000000..9f14347 --- /dev/null +++ b/test/CodeGen/SystemZ/int-const-06.ll @@ -0,0 +1,102 @@ +; Test moves of integers to 8-byte memory locations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check moves of zero. +define void @f1(i64 *%a) { +; CHECK: f1: +; CHECK: mvghi 0(%r2), 0 +; CHECK: br %r14 + store i64 0, i64 *%a + ret void +} + +; Check the high end of the signed 16-bit range. +define void @f2(i64 *%a) { +; CHECK: f2: +; CHECK: mvghi 0(%r2), 32767 +; CHECK: br %r14 + store i64 32767, i64 *%a + ret void +} + +; Check the next value up, which can't use MVGHI. +define void @f3(i64 *%a) { +; CHECK: f3: +; CHECK-NOT: mvghi +; CHECK: br %r14 + store i64 32768, i64 *%a + ret void +} + +; Check moves of -1. +define void @f4(i64 *%a) { +; CHECK: f4: +; CHECK: mvghi 0(%r2), -1 +; CHECK: br %r14 + store i64 -1, i64 *%a + ret void +} + +; Check the low end of the MVGHI range. +define void @f5(i64 *%a) { +; CHECK: f5: +; CHECK: mvghi 0(%r2), -32768 +; CHECK: br %r14 + store i64 -32768, i64 *%a + ret void +} + +; Check the next value down, which can't use MVGHI. +define void @f6(i64 *%a) { +; CHECK: f6: +; CHECK-NOT: mvghi +; CHECK: br %r14 + store i64 -32769, i64 *%a + ret void +} + +; Check the high end of the MVGHI range. +define void @f7(i64 *%a) { +; CHECK: f7: +; CHECK: mvghi 4088(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i64 *%a, i64 511 + store i64 42, i64 *%ptr + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(i64 *%a) { +; CHECK: f8: +; CHECK: aghi %r2, 4096 +; CHECK: mvghi 0(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i64 *%a, i64 512 + store i64 42, i64 *%ptr + ret void +} + +; Check negative displacements, which also need separate address logic. +define void @f9(i64 *%a) { +; CHECK: f9: +; CHECK: aghi %r2, -8 +; CHECK: mvghi 0(%r2), 42 +; CHECK: br %r14 + %ptr = getelementptr i64 *%a, i64 -1 + store i64 42, i64 *%ptr + ret void +} + +; Check that MVGHI does not allow an index +define void @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: agr %r2, %r3 +; CHECK: mvghi 0(%r2), 42 +; CHECK: br %r14 + %add = add i64 %src, %index + %ptr = inttoptr i64 %add to i64 * + store i64 42, i64 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-conv-01.ll b/test/CodeGen/SystemZ/int-conv-01.ll new file mode 100644 index 0000000..643ac6a --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-01.ll @@ -0,0 +1,105 @@ +; Test sign extensions from a byte to an i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: lbr %r2, %r2 +; CHECk: br %r14 + %byte = trunc i32 %a to i8 + %ext = sext i8 %byte to i32 + ret i32 %ext +} + +; ...and again with an i64. +define i32 @f2(i64 %a) { +; CHECK: f2: +; CHECK: lbr %r2, %r2 +; CHECk: br %r14 + %byte = trunc i64 %a to i8 + %ext = sext i8 %byte to i32 + ret i32 %ext +} + +; Check LB with no displacement. +define i32 @f3(i8 *%src) { +; CHECK: f3: +; CHECK: lb %r2, 0(%r2) +; CHECK: br %r14 + %byte = load i8 *%src + %ext = sext i8 %byte to i32 + ret i32 %ext +} + +; Check the high end of the LB range. +define i32 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: lb %r2, 524287(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i32 + ret i32 %ext +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: lb %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i32 + ret i32 %ext +} + +; Check the high end of the negative LB range. +define i32 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: lb %r2, -1(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i32 + ret i32 %ext +} + +; Check the low end of the LB range. +define i32 @f7(i8 *%src) { +; CHECK: f7: +; CHECK: lb %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i32 + ret i32 %ext +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f8(i8 *%src) { +; CHECK: f8: +; CHECK: agfi %r2, -524289 +; CHECK: lb %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i32 + ret i32 %ext +} + +; Check that LB allows an index +define i32 @f9(i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: lb %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i8 * + %byte = load i8 *%ptr + %ext = sext i8 %byte to i32 + ret i32 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-02.ll b/test/CodeGen/SystemZ/int-conv-02.ll new file mode 100644 index 0000000..86144d3 --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-02.ll @@ -0,0 +1,114 @@ +; Test zero extensions from a byte to an i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: llcr %r2, %r2 +; CHECk: br %r14 + %byte = trunc i32 %a to i8 + %ext = zext i8 %byte to i32 + ret i32 %ext +} + +; ...and again with an i64. +define i32 @f2(i64 %a) { +; CHECK: f2: +; CHECK: llcr %r2, %r2 +; CHECk: br %r14 + %byte = trunc i64 %a to i8 + %ext = zext i8 %byte to i32 + ret i32 %ext +} + +; Check ANDs that are equivalent to zero extension. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK: llcr %r2, %r2 +; CHECk: br %r14 + %ext = and i32 %a, 255 + ret i32 %ext +} + +; Check LLC with no displacement. +define i32 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: llc %r2, 0(%r2) +; CHECK: br %r14 + %byte = load i8 *%src + %ext = zext i8 %byte to i32 + ret i32 %ext +} + +; Check the high end of the LLC range. +define i32 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: llc %r2, 524287(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i32 + ret i32 %ext +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: agfi %r2, 524288 +; CHECK: llc %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i32 + ret i32 %ext +} + +; Check the high end of the negative LLC range. +define i32 @f7(i8 *%src) { +; CHECK: f7: +; CHECK: llc %r2, -1(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i32 + ret i32 %ext +} + +; Check the low end of the LLC range. +define i32 @f8(i8 *%src) { +; CHECK: f8: +; CHECK: llc %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i32 + ret i32 %ext +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f9(i8 *%src) { +; CHECK: f9: +; CHECK: agfi %r2, -524289 +; CHECK: llc %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i32 + ret i32 %ext +} + +; Check that LLC allows an index +define i32 @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: llc %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i8 * + %byte = load i8 *%ptr + %ext = zext i8 %byte to i32 + ret i32 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-03.ll b/test/CodeGen/SystemZ/int-conv-03.ll new file mode 100644 index 0000000..73b8dbb --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-03.ll @@ -0,0 +1,105 @@ +; Test sign extensions from a byte to an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i64 @f1(i32 %a) { +; CHECK: f1: +; CHECK: lgbr %r2, %r2 +; CHECk: br %r14 + %byte = trunc i32 %a to i8 + %ext = sext i8 %byte to i64 + ret i64 %ext +} + +; ...and again with an i64. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: lgbr %r2, %r2 +; CHECk: br %r14 + %byte = trunc i64 %a to i8 + %ext = sext i8 %byte to i64 + ret i64 %ext +} + +; Check LGB with no displacement. +define i64 @f3(i8 *%src) { +; CHECK: f3: +; CHECK: lgb %r2, 0(%r2) +; CHECK: br %r14 + %byte = load i8 *%src + %ext = sext i8 %byte to i64 + ret i64 %ext +} + +; Check the high end of the LGB range. +define i64 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: lgb %r2, 524287(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i64 + ret i64 %ext +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: lgb %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i64 + ret i64 %ext +} + +; Check the high end of the negative LGB range. +define i64 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: lgb %r2, -1(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i64 + ret i64 %ext +} + +; Check the low end of the LGB range. +define i64 @f7(i8 *%src) { +; CHECK: f7: +; CHECK: lgb %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i64 + ret i64 %ext +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f8(i8 *%src) { +; CHECK: f8: +; CHECK: agfi %r2, -524289 +; CHECK: lgb %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %byte = load i8 *%ptr + %ext = sext i8 %byte to i64 + ret i64 %ext +} + +; Check that LGB allows an index +define i64 @f9(i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: lgb %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i8 * + %byte = load i8 *%ptr + %ext = sext i8 %byte to i64 + ret i64 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-04.ll b/test/CodeGen/SystemZ/int-conv-04.ll new file mode 100644 index 0000000..4cec524 --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-04.ll @@ -0,0 +1,114 @@ +; Test zero extensions from a byte to an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i64 @f1(i32 %a) { +; CHECK: f1: +; CHECK: llgcr %r2, %r2 +; CHECk: br %r14 + %byte = trunc i32 %a to i8 + %ext = zext i8 %byte to i64 + ret i64 %ext +} + +; ...and again with an i64. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: llgcr %r2, %r2 +; CHECk: br %r14 + %byte = trunc i64 %a to i8 + %ext = zext i8 %byte to i64 + ret i64 %ext +} + +; Check ANDs that are equivalent to zero extension. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK: llgcr %r2, %r2 +; CHECk: br %r14 + %ext = and i64 %a, 255 + ret i64 %ext +} + +; Check LLGC with no displacement. +define i64 @f4(i8 *%src) { +; CHECK: f4: +; CHECK: llgc %r2, 0(%r2) +; CHECK: br %r14 + %byte = load i8 *%src + %ext = zext i8 %byte to i64 + ret i64 %ext +} + +; Check the high end of the LLGC range. +define i64 @f5(i8 *%src) { +; CHECK: f5: +; CHECK: llgc %r2, 524287(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i64 + ret i64 %ext +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i8 *%src) { +; CHECK: f6: +; CHECK: agfi %r2, 524288 +; CHECK: llgc %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i64 + ret i64 %ext +} + +; Check the high end of the negative LLGC range. +define i64 @f7(i8 *%src) { +; CHECK: f7: +; CHECK: llgc %r2, -1(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i64 + ret i64 %ext +} + +; Check the low end of the LLGC range. +define i64 @f8(i8 *%src) { +; CHECK: f8: +; CHECK: llgc %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i64 + ret i64 %ext +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f9(i8 *%src) { +; CHECK: f9: +; CHECK: agfi %r2, -524289 +; CHECK: llgc %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %byte = load i8 *%ptr + %ext = zext i8 %byte to i64 + ret i64 %ext +} + +; Check that LLGC allows an index +define i64 @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: llgc %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i8 * + %byte = load i8 *%ptr + %ext = zext i8 %byte to i64 + ret i64 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-05.ll b/test/CodeGen/SystemZ/int-conv-05.ll new file mode 100644 index 0000000..5358f7d --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-05.ll @@ -0,0 +1,140 @@ +; Test sign extensions from a halfword to an i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: lhr %r2, %r2 +; CHECk: br %r14 + %half = trunc i32 %a to i16 + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; ...and again with an i64. +define i32 @f2(i64 %a) { +; CHECK: f2: +; CHECK: lhr %r2, %r2 +; CHECk: br %r14 + %half = trunc i64 %a to i16 + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check the low end of the LH range. +define i32 @f3(i16 *%src) { +; CHECK: f3: +; CHECK: lh %r2, 0(%r2) +; CHECK: br %r14 + %half = load i16 *%src + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check the high end of the LH range. +define i32 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: lh %r2, 4094(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2047 + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check the next halfword up, which needs LHY rather than LH. +define i32 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: lhy %r2, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2048 + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check the high end of the LHY range. +define i32 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: lhy %r2, 524286(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f7(i16 *%src) { +; CHECK: f7: +; CHECK: agfi %r2, 524288 +; CHECK: lh %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check the high end of the negative LHY range. +define i32 @f8(i16 *%src) { +; CHECK: f8: +; CHECK: lhy %r2, -2(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check the low end of the LHY range. +define i32 @f9(i16 *%src) { +; CHECK: f9: +; CHECK: lhy %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f10(i16 *%src) { +; CHECK: f10: +; CHECK: agfi %r2, -524290 +; CHECK: lh %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check that LH allows an index +define i32 @f11(i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: lh %r2, 4094(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} + +; Check that LH allows an index +define i32 @f12(i64 %src, i64 %index) { +; CHECK: f12: +; CHECK: lhy %r2, 4096(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %ext = sext i16 %half to i32 + ret i32 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-06.ll b/test/CodeGen/SystemZ/int-conv-06.ll new file mode 100644 index 0000000..64af612 --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-06.ll @@ -0,0 +1,114 @@ +; Test zero extensions from a halfword to an i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: llhr %r2, %r2 +; CHECk: br %r14 + %half = trunc i32 %a to i16 + %ext = zext i16 %half to i32 + ret i32 %ext +} + +; ...and again with an i64. +define i32 @f2(i64 %a) { +; CHECK: f2: +; CHECK: llhr %r2, %r2 +; CHECk: br %r14 + %half = trunc i64 %a to i16 + %ext = zext i16 %half to i32 + ret i32 %ext +} + +; Check ANDs that are equivalent to zero extension. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK: llhr %r2, %r2 +; CHECk: br %r14 + %ext = and i32 %a, 65535 + ret i32 %ext +} + +; Check LLH with no displacement. +define i32 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: llh %r2, 0(%r2) +; CHECK: br %r14 + %half = load i16 *%src + %ext = zext i16 %half to i32 + ret i32 %ext +} + +; Check the high end of the LLH range. +define i32 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: llh %r2, 524286(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %ext = zext i16 %half to i32 + ret i32 %ext +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: agfi %r2, 524288 +; CHECK: llh %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %ext = zext i16 %half to i32 + ret i32 %ext +} + +; Check the high end of the negative LLH range. +define i32 @f7(i16 *%src) { +; CHECK: f7: +; CHECK: llh %r2, -2(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %ext = zext i16 %half to i32 + ret i32 %ext +} + +; Check the low end of the LLH range. +define i32 @f8(i16 *%src) { +; CHECK: f8: +; CHECK: llh %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %ext = zext i16 %half to i32 + ret i32 %ext +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f9(i16 *%src) { +; CHECK: f9: +; CHECK: agfi %r2, -524290 +; CHECK: llh %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %ext = zext i16 %half to i32 + ret i32 %ext +} + +; Check that LLH allows an index +define i32 @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: llh %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %ext = zext i16 %half to i32 + ret i32 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-07.ll b/test/CodeGen/SystemZ/int-conv-07.ll new file mode 100644 index 0000000..041caa2 --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-07.ll @@ -0,0 +1,105 @@ +; Test sign extensions from a halfword to an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: lghr %r2, %r2 +; CHECk: br %r14 + %half = trunc i64 %a to i16 + %ext = sext i16 %half to i64 + ret i64 %ext +} + +; ...and again with an i64. +define i64 @f2(i32 %a) { +; CHECK: f2: +; CHECK: lghr %r2, %r2 +; CHECk: br %r14 + %half = trunc i32 %a to i16 + %ext = sext i16 %half to i64 + ret i64 %ext +} + +; Check LGH with no displacement. +define i64 @f3(i16 *%src) { +; CHECK: f3: +; CHECK: lgh %r2, 0(%r2) +; CHECK: br %r14 + %half = load i16 *%src + %ext = sext i16 %half to i64 + ret i64 %ext +} + +; Check the high end of the LGH range. +define i64 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: lgh %r2, 524286(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %ext = sext i16 %half to i64 + ret i64 %ext +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: lgh %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %ext = sext i16 %half to i64 + ret i64 %ext +} + +; Check the high end of the negative LGH range. +define i64 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: lgh %r2, -2(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %ext = sext i16 %half to i64 + ret i64 %ext +} + +; Check the low end of the LGH range. +define i64 @f7(i16 *%src) { +; CHECK: f7: +; CHECK: lgh %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %ext = sext i16 %half to i64 + ret i64 %ext +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f8(i16 *%src) { +; CHECK: f8: +; CHECK: agfi %r2, -524290 +; CHECK: lgh %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %ext = sext i16 %half to i64 + ret i64 %ext +} + +; Check that LGH allows an index. +define i64 @f9(i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: lgh %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %ext = sext i16 %half to i64 + ret i64 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-08.ll b/test/CodeGen/SystemZ/int-conv-08.ll new file mode 100644 index 0000000..3d7f966 --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-08.ll @@ -0,0 +1,114 @@ +; Test zero extensions from a halfword to an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i64 @f1(i32 %a) { +; CHECK: f1: +; CHECK: llghr %r2, %r2 +; CHECk: br %r14 + %half = trunc i32 %a to i16 + %ext = zext i16 %half to i64 + ret i64 %ext +} + +; ...and again with an i64. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: llghr %r2, %r2 +; CHECk: br %r14 + %half = trunc i64 %a to i16 + %ext = zext i16 %half to i64 + ret i64 %ext +} + +; Check ANDs that are equivalent to zero extension. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK: llghr %r2, %r2 +; CHECk: br %r14 + %ext = and i64 %a, 65535 + ret i64 %ext +} + +; Check LLGH with no displacement. +define i64 @f4(i16 *%src) { +; CHECK: f4: +; CHECK: llgh %r2, 0(%r2) +; CHECK: br %r14 + %half = load i16 *%src + %ext = zext i16 %half to i64 + ret i64 %ext +} + +; Check the high end of the LLGH range. +define i64 @f5(i16 *%src) { +; CHECK: f5: +; CHECK: llgh %r2, 524286(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %ext = zext i16 %half to i64 + ret i64 %ext +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i16 *%src) { +; CHECK: f6: +; CHECK: agfi %r2, 524288 +; CHECK: llgh %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %ext = zext i16 %half to i64 + ret i64 %ext +} + +; Check the high end of the negative LLGH range. +define i64 @f7(i16 *%src) { +; CHECK: f7: +; CHECK: llgh %r2, -2(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %ext = zext i16 %half to i64 + ret i64 %ext +} + +; Check the low end of the LLGH range. +define i64 @f8(i16 *%src) { +; CHECK: f8: +; CHECK: llgh %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %ext = zext i16 %half to i64 + ret i64 %ext +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f9(i16 *%src) { +; CHECK: f9: +; CHECK: agfi %r2, -524290 +; CHECK: llgh %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %ext = zext i16 %half to i64 + ret i64 %ext +} + +; Check that LLGH allows an index +define i64 @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: llgh %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %ext = zext i16 %half to i64 + ret i64 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-09.ll b/test/CodeGen/SystemZ/int-conv-09.ll new file mode 100644 index 0000000..6e93886 --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-09.ll @@ -0,0 +1,104 @@ +; Test sign extensions from an i32 to an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i64 @f1(i32 %a) { +; CHECK: f1: +; CHECK: lgfr %r2, %r2 +; CHECk: br %r14 + %ext = sext i32 %a to i64 + ret i64 %ext +} + +; ...and again with an i64. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: lgfr %r2, %r2 +; CHECk: br %r14 + %word = trunc i64 %a to i32 + %ext = sext i32 %word to i64 + ret i64 %ext +} + +; Check LGF with no displacement. +define i64 @f3(i32 *%src) { +; CHECK: f3: +; CHECK: lgf %r2, 0(%r2) +; CHECK: br %r14 + %word = load i32 *%src + %ext = sext i32 %word to i64 + ret i64 %ext +} + +; Check the high end of the LGF range. +define i64 @f4(i32 *%src) { +; CHECK: f4: +; CHECK: lgf %r2, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %word = load i32 *%ptr + %ext = sext i32 %word to i64 + ret i64 %ext +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f5(i32 *%src) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: lgf %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %word = load i32 *%ptr + %ext = sext i32 %word to i64 + ret i64 %ext +} + +; Check the high end of the negative LGF range. +define i64 @f6(i32 *%src) { +; CHECK: f6: +; CHECK: lgf %r2, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %word = load i32 *%ptr + %ext = sext i32 %word to i64 + ret i64 %ext +} + +; Check the low end of the LGF range. +define i64 @f7(i32 *%src) { +; CHECK: f7: +; CHECK: lgf %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %word = load i32 *%ptr + %ext = sext i32 %word to i64 + ret i64 %ext +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f8(i32 *%src) { +; CHECK: f8: +; CHECK: agfi %r2, -524292 +; CHECK: lgf %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %word = load i32 *%ptr + %ext = sext i32 %word to i64 + ret i64 %ext +} + +; Check that LGF allows an index. +define i64 @f9(i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: lgf %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i32 * + %word = load i32 *%ptr + %ext = sext i32 %word to i64 + ret i64 %ext +} diff --git a/test/CodeGen/SystemZ/int-conv-10.ll b/test/CodeGen/SystemZ/int-conv-10.ll new file mode 100644 index 0000000..918bc1d --- /dev/null +++ b/test/CodeGen/SystemZ/int-conv-10.ll @@ -0,0 +1,113 @@ +; Test zero extensions from an i32 to an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i32. +define i64 @f1(i32 %a) { +; CHECK: f1: +; CHECK: llgfr %r2, %r2 +; CHECk: br %r14 + %ext = zext i32 %a to i64 + ret i64 %ext +} + +; ...and again with an i64. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: llgfr %r2, %r2 +; CHECk: br %r14 + %word = trunc i64 %a to i32 + %ext = zext i32 %word to i64 + ret i64 %ext +} + +; Check ANDs that are equivalent to zero extension. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK: llgfr %r2, %r2 +; CHECk: br %r14 + %ext = and i64 %a, 4294967295 + ret i64 %ext +} + +; Check LLGF with no displacement. +define i64 @f4(i32 *%src) { +; CHECK: f4: +; CHECK: llgf %r2, 0(%r2) +; CHECK: br %r14 + %word = load i32 *%src + %ext = zext i32 %word to i64 + ret i64 %ext +} + +; Check the high end of the LLGF range. +define i64 @f5(i32 *%src) { +; CHECK: f5: +; CHECK: llgf %r2, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %word = load i32 *%ptr + %ext = zext i32 %word to i64 + ret i64 %ext +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i32 *%src) { +; CHECK: f6: +; CHECK: agfi %r2, 524288 +; CHECK: llgf %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %word = load i32 *%ptr + %ext = zext i32 %word to i64 + ret i64 %ext +} + +; Check the high end of the negative LLGF range. +define i64 @f7(i32 *%src) { +; CHECK: f7: +; CHECK: llgf %r2, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %word = load i32 *%ptr + %ext = zext i32 %word to i64 + ret i64 %ext +} + +; Check the low end of the LLGF range. +define i64 @f8(i32 *%src) { +; CHECK: f8: +; CHECK: llgf %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %word = load i32 *%ptr + %ext = zext i32 %word to i64 + ret i64 %ext +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f9(i32 *%src) { +; CHECK: f9: +; CHECK: agfi %r2, -524292 +; CHECK: llgf %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %word = load i32 *%ptr + %ext = zext i32 %word to i64 + ret i64 %ext +} + +; Check that LLGF allows an index. +define i64 @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: llgf %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i32 * + %word = load i32 *%ptr + %ext = zext i32 %word to i64 + ret i64 %ext +} diff --git a/test/CodeGen/SystemZ/int-div-01.ll b/test/CodeGen/SystemZ/int-div-01.ll new file mode 100644 index 0000000..492ece9 --- /dev/null +++ b/test/CodeGen/SystemZ/int-div-01.ll @@ -0,0 +1,190 @@ +; Test 32-bit signed division and remainder. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register division. The result is in the second of the two registers. +define void @f1(i32 *%dest, i32 %a, i32 %b) { +; CHECK: f1: +; CHECK: lgfr %r1, %r3 +; CHECK: dsgfr %r0, %r4 +; CHECK: st %r1, 0(%r2) +; CHECK: br %r14 + %div = sdiv i32 %a, %b + store i32 %div, i32 *%dest + ret void +} + +; Test register remainder. The result is in the first of the two registers. +define void @f2(i32 *%dest, i32 %a, i32 %b) { +; CHECK: f2: +; CHECK: lgfr %r1, %r3 +; CHECK: dsgfr %r0, %r4 +; CHECK: st %r0, 0(%r2) +; CHECK: br %r14 + %rem = srem i32 %a, %b + store i32 %rem, i32 *%dest + ret void +} + +; Test that division and remainder use a single instruction. +define i32 @f3(i32 %dummy, i32 %a, i32 %b) { +; CHECK: f3: +; CHECK-NOT: %r2 +; CHECK: lgfr %r3, %r3 +; CHECK-NOT: %r2 +; CHECK: dsgfr %r2, %r4 +; CHECK-NOT: dsgfr +; CHECK: or %r2, %r3 +; CHECK: br %r14 + %div = sdiv i32 %a, %b + %rem = srem i32 %a, %b + %or = or i32 %rem, %div + ret i32 %or +} + +; Check that the sign extension of the dividend is elided when the argument +; is already sign-extended. +define i32 @f4(i32 %dummy, i32 signext %a, i32 %b) { +; CHECK: f4: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgfr %r2, %r4 +; CHECK-NOT: dsgfr +; CHECK: or %r2, %r3 +; CHECK: br %r14 + %div = sdiv i32 %a, %b + %rem = srem i32 %a, %b + %or = or i32 %rem, %div + ret i32 %or +} + +; Test that memory dividends are loaded using sign extension (LGF). +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK: f5: +; CHECK-NOT: %r2 +; CHECK: lgf %r3, 0(%r3) +; CHECK-NOT: %r2 +; CHECK: dsgfr %r2, %r4 +; CHECK-NOT: dsgfr +; CHECK: or %r2, %r3 +; CHECK: br %r14 + %a = load i32 *%src + %div = sdiv i32 %a, %b + %rem = srem i32 %a, %b + %or = or i32 %rem, %div + ret i32 %or +} + +; Test memory division with no displacement. +define void @f6(i32 *%dest, i32 %a, i32 *%src) { +; CHECK: f6: +; CHECK: lgfr %r1, %r3 +; CHECK: dsgf %r0, 0(%r4) +; CHECK: st %r1, 0(%r2) +; CHECK: br %r14 + %b = load i32 *%src + %div = sdiv i32 %a, %b + store i32 %div, i32 *%dest + ret void +} + +; Test memory remainder with no displacement. +define void @f7(i32 *%dest, i32 %a, i32 *%src) { +; CHECK: f7: +; CHECK: lgfr %r1, %r3 +; CHECK: dsgf %r0, 0(%r4) +; CHECK: st %r0, 0(%r2) +; CHECK: br %r14 + %b = load i32 *%src + %rem = srem i32 %a, %b + store i32 %rem, i32 *%dest + ret void +} + +; Test both memory division and memory remainder. +define i32 @f8(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f8: +; CHECK-NOT: %r2 +; CHECK: lgfr %r3, %r3 +; CHECK-NOT: %r2 +; CHECK: dsgf %r2, 0(%r4) +; CHECK-NOT: {{dsgf|dsgfr}} +; CHECK: or %r2, %r3 +; CHECK: br %r14 + %b = load i32 *%src + %div = sdiv i32 %a, %b + %rem = srem i32 %a, %b + %or = or i32 %rem, %div + ret i32 %or +} + +; Check the high end of the DSGF range. +define i32 @f9(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f9: +; CHECK: dsgf %r2, 524284(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %rem = srem i32 %a, %b + ret i32 %rem +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f10(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f10: +; CHECK: agfi %r4, 524288 +; CHECK: dsgf %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %rem = srem i32 %a, %b + ret i32 %rem +} + +; Check the high end of the negative aligned DSGF range. +define i32 @f11(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f11: +; CHECK: dsgf %r2, -4(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %rem = srem i32 %a, %b + ret i32 %rem +} + +; Check the low end of the DSGF range. +define i32 @f12(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f12: +; CHECK: dsgf %r2, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %rem = srem i32 %a, %b + ret i32 %rem +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f13(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f13: +; CHECK: agfi %r4, -524292 +; CHECK: dsgf %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %rem = srem i32 %a, %b + ret i32 %rem +} + +; Check that DSGF allows an index. +define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) { +; CHECK: f14: +; CHECK: dsgf %r2, 524287(%r5,%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %rem = srem i32 %a, %b + ret i32 %rem +} diff --git a/test/CodeGen/SystemZ/int-div-02.ll b/test/CodeGen/SystemZ/int-div-02.ll new file mode 100644 index 0000000..7954384 --- /dev/null +++ b/test/CodeGen/SystemZ/int-div-02.ll @@ -0,0 +1,166 @@ +; Test 32-bit unsigned division and remainder. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register division. The result is in the second of the two registers. +define void @f1(i32 %dummy, i32 %a, i32 %b, i32 *%dest) { +; CHECK: f1: +; CHECK-NOT: %r3 +; CHECK: {{llill|lhi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlr %r2, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: br %r14 + %div = udiv i32 %a, %b + store i32 %div, i32 *%dest + ret void +} + +; Test register remainder. The result is in the first of the two registers. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%dest) { +; CHECK: f2: +; CHECK-NOT: %r3 +; CHECK: {{llill|lhi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlr %r2, %r4 +; CHECK: st %r2, 0(%r5) +; CHECK: br %r14 + %rem = urem i32 %a, %b + store i32 %rem, i32 *%dest + ret void +} + +; Test that division and remainder use a single instruction. +define i32 @f3(i32 %dummy1, i32 %a, i32 %b) { +; CHECK: f3: +; CHECK-NOT: %r3 +; CHECK: {{llill|lhi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlr %r2, %r4 +; CHECK-NOT: dlr +; CHECK: or %r2, %r3 +; CHECK: br %r14 + %div = udiv i32 %a, %b + %rem = urem i32 %a, %b + %or = or i32 %rem, %div + ret i32 %or +} + +; Test memory division with no displacement. +define void @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) { +; CHECK: f4: +; CHECK-NOT: %r3 +; CHECK: {{llill|lhi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dl %r2, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: br %r14 + %b = load i32 *%src + %div = udiv i32 %a, %b + store i32 %div, i32 *%dest + ret void +} + +; Test memory remainder with no displacement. +define void @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) { +; CHECK: f5: +; CHECK-NOT: %r3 +; CHECK: {{llill|lhi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dl %r2, 0(%r4) +; CHECK: st %r2, 0(%r5) +; CHECK: br %r14 + %b = load i32 *%src + %rem = urem i32 %a, %b + store i32 %rem, i32 *%dest + ret void +} + +; Test both memory division and memory remainder. +define i32 @f6(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f6: +; CHECK-NOT: %r3 +; CHECK: {{llill|lhi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dl %r2, 0(%r4) +; CHECK-NOT: {{dl|dlr}} +; CHECK: or %r2, %r3 +; CHECK: br %r14 + %b = load i32 *%src + %div = udiv i32 %a, %b + %rem = urem i32 %a, %b + %or = or i32 %rem, %div + ret i32 %or +} + +; Check the high end of the DL range. +define i32 @f7(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f7: +; CHECK: dl %r2, 524284(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %rem = urem i32 %a, %b + ret i32 %rem +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f8(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f8: +; CHECK: agfi %r4, 524288 +; CHECK: dl %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %rem = urem i32 %a, %b + ret i32 %rem +} + +; Check the high end of the negative aligned DL range. +define i32 @f9(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f9: +; CHECK: dl %r2, -4(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %rem = urem i32 %a, %b + ret i32 %rem +} + +; Check the low end of the DL range. +define i32 @f10(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f10: +; CHECK: dl %r2, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %rem = urem i32 %a, %b + ret i32 %rem +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f11(i32 %dummy, i32 %a, i32 *%src) { +; CHECK: f11: +; CHECK: agfi %r4, -524292 +; CHECK: dl %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %rem = urem i32 %a, %b + ret i32 %rem +} + +; Check that DL allows an index. +define i32 @f12(i32 %dummy, i32 %a, i64 %src, i64 %index) { +; CHECK: f12: +; CHECK: dl %r2, 524287(%r5,%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %rem = urem i32 %a, %b + ret i32 %rem +} diff --git a/test/CodeGen/SystemZ/int-div-03.ll b/test/CodeGen/SystemZ/int-div-03.ll new file mode 100644 index 0000000..b950f2b --- /dev/null +++ b/test/CodeGen/SystemZ/int-div-03.ll @@ -0,0 +1,189 @@ +; Test 64-bit signed division and remainder when the divisor is +; a signed-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register division. The result is in the second of the two registers. +define void @f1(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { +; CHECK: f1: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgfr %r2, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %div = sdiv i64 %a, %bext + store i64 %div, i64 *%dest + ret void +} + +; Test register remainder. The result is in the first of the two registers. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { +; CHECK: f2: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgfr %r2, %r4 +; CHECK: stg %r2, 0(%r5) +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %rem = srem i64 %a, %bext + store i64 %rem, i64 *%dest + ret void +} + +; Test that division and remainder use a single instruction. +define i64 @f3(i64 %dummy, i64 %a, i32 %b) { +; CHECK: f3: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgfr %r2, %r4 +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %div = sdiv i64 %a, %bext + %rem = srem i64 %a, %bext + %or = or i64 %rem, %div + ret i64 %or +} + +; Test register division when the dividend is zero rather than sign extended. +; We can't use dsgfr here +define void @f4(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { +; CHECK: f4: +; CHECK-NOT: dsgfr +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %div = sdiv i64 %a, %bext + store i64 %div, i64 *%dest + ret void +} + +; ...likewise remainder. +define void @f5(i64 %dummy, i64 %a, i32 %b, i64 *%dest) { +; CHECK: f5: +; CHECK-NOT: dsgfr +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %rem = srem i64 %a, %bext + store i64 %rem, i64 *%dest + ret void +} + +; Test memory division with no displacement. +define void @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) { +; CHECK: f6: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgf %r2, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: br %r14 + %b = load i32 *%src + %bext = sext i32 %b to i64 + %div = sdiv i64 %a, %bext + store i64 %div, i64 *%dest + ret void +} + +; Test memory remainder with no displacement. +define void @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) { +; CHECK: f7: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgf %r2, 0(%r4) +; CHECK: stg %r2, 0(%r5) +; CHECK: br %r14 + %b = load i32 *%src + %bext = sext i32 %b to i64 + %rem = srem i64 %a, %bext + store i64 %rem, i64 *%dest + ret void +} + +; Test both memory division and memory remainder. +define i64 @f8(i64 %dummy, i64 %a, i32 *%src) { +; CHECK: f8: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgf %r2, 0(%r4) +; CHECK-NOT: {{dsgf|dsgfr}} +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %b = load i32 *%src + %bext = sext i32 %b to i64 + %div = sdiv i64 %a, %bext + %rem = srem i64 %a, %bext + %or = or i64 %rem, %div + ret i64 %or +} + +; Check the high end of the DSGF range. +define i64 @f9(i64 %dummy, i64 %a, i32 *%src) { +; CHECK: f9: +; CHECK: dsgf %r2, 524284(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %rem = srem i64 %a, %bext + ret i64 %rem +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f10(i64 %dummy, i64 %a, i32 *%src) { +; CHECK: f10: +; CHECK: agfi %r4, 524288 +; CHECK: dsgf %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %rem = srem i64 %a, %bext + ret i64 %rem +} + +; Check the high end of the negative aligned DSGF range. +define i64 @f11(i64 %dummy, i64 %a, i32 *%src) { +; CHECK: f11: +; CHECK: dsgf %r2, -4(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %rem = srem i64 %a, %bext + ret i64 %rem +} + +; Check the low end of the DSGF range. +define i64 @f12(i64 %dummy, i64 %a, i32 *%src) { +; CHECK: f12: +; CHECK: dsgf %r2, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %rem = srem i64 %a, %bext + ret i64 %rem +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f13(i64 %dummy, i64 %a, i32 *%src) { +; CHECK: f13: +; CHECK: agfi %r4, -524292 +; CHECK: dsgf %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %rem = srem i64 %a, %bext + ret i64 %rem +} + +; Check that DSGF allows an index. +define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) { +; CHECK: f14: +; CHECK: dsgf %r2, 524287(%r5,%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %rem = srem i64 %a, %bext + ret i64 %rem +} diff --git a/test/CodeGen/SystemZ/int-div-04.ll b/test/CodeGen/SystemZ/int-div-04.ll new file mode 100644 index 0000000..3f72be9 --- /dev/null +++ b/test/CodeGen/SystemZ/int-div-04.ll @@ -0,0 +1,154 @@ +; Testg 64-bit signed division and remainder. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Testg register division. The result is in the second of the two registers. +define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { +; CHECK: f1: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgr %r2, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: br %r14 + %div = sdiv i64 %a, %b + store i64 %div, i64 *%dest + ret void +} + +; Testg register remainder. The result is in the first of the two registers. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { +; CHECK: f2: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgr %r2, %r4 +; CHECK: stg %r2, 0(%r5) +; CHECK: br %r14 + %rem = srem i64 %a, %b + store i64 %rem, i64 *%dest + ret void +} + +; Testg that division and remainder use a single instruction. +define i64 @f3(i64 %dummy1, i64 %a, i64 %b) { +; CHECK: f3: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsgr %r2, %r4 +; CHECK-NOT: dsgr +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %div = sdiv i64 %a, %b + %rem = srem i64 %a, %b + %or = or i64 %rem, %div + ret i64 %or +} + +; Testg memory division with no displacement. +define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { +; CHECK: f4: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsg %r2, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: br %r14 + %b = load i64 *%src + %div = sdiv i64 %a, %b + store i64 %div, i64 *%dest + ret void +} + +; Testg memory remainder with no displacement. +define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { +; CHECK: f5: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsg %r2, 0(%r4) +; CHECK: stg %r2, 0(%r5) +; CHECK: br %r14 + %b = load i64 *%src + %rem = srem i64 %a, %b + store i64 %rem, i64 *%dest + ret void +} + +; Testg both memory division and memory remainder. +define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK-NOT: {{%r[234]}} +; CHECK: dsg %r2, 0(%r4) +; CHECK-NOT: {{dsg|dsgr}} +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %b = load i64 *%src + %div = sdiv i64 %a, %b + %rem = srem i64 %a, %b + %or = or i64 %rem, %div + ret i64 %or +} + +; Check the high end of the DSG range. +define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: dsg %r2, 524280(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %rem = srem i64 %a, %b + ret i64 %rem +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f8: +; CHECK: agfi %r4, 524288 +; CHECK: dsg %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %rem = srem i64 %a, %b + ret i64 %rem +} + +; Check the high end of the negative aligned DSG range. +define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f9: +; CHECK: dsg %r2, -8(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %rem = srem i64 %a, %b + ret i64 %rem +} + +; Check the low end of the DSG range. +define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f10: +; CHECK: dsg %r2, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %rem = srem i64 %a, %b + ret i64 %rem +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f11(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f11: +; CHECK: agfi %r4, -524296 +; CHECK: dsg %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %rem = srem i64 %a, %b + ret i64 %rem +} + +; Check that DSG allows an index. +define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) { +; CHECK: f12: +; CHECK: dsg %r2, 524287(%r5,%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %rem = srem i64 %a, %b + ret i64 %rem +} diff --git a/test/CodeGen/SystemZ/int-div-05.ll b/test/CodeGen/SystemZ/int-div-05.ll new file mode 100644 index 0000000..04f622b --- /dev/null +++ b/test/CodeGen/SystemZ/int-div-05.ll @@ -0,0 +1,166 @@ +; Testg 64-bit unsigned division and remainder. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Testg register division. The result is in the second of the two registers. +define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { +; CHECK: f1: +; CHECK-NOT: %r3 +; CHECK: {{llill|lghi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlgr %r2, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: br %r14 + %div = udiv i64 %a, %b + store i64 %div, i64 *%dest + ret void +} + +; Testg register remainder. The result is in the first of the two registers. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) { +; CHECK: f2: +; CHECK-NOT: %r3 +; CHECK: {{llill|lghi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlgr %r2, %r4 +; CHECK: stg %r2, 0(%r5) +; CHECK: br %r14 + %rem = urem i64 %a, %b + store i64 %rem, i64 *%dest + ret void +} + +; Testg that division and remainder use a single instruction. +define i64 @f3(i64 %dummy1, i64 %a, i64 %b) { +; CHECK: f3: +; CHECK-NOT: %r3 +; CHECK: {{llill|lghi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlgr %r2, %r4 +; CHECK-NOT: dlgr +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %div = udiv i64 %a, %b + %rem = urem i64 %a, %b + %or = or i64 %rem, %div + ret i64 %or +} + +; Testg memory division with no displacement. +define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { +; CHECK: f4: +; CHECK-NOT: %r3 +; CHECK: {{llill|lghi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlg %r2, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: br %r14 + %b = load i64 *%src + %div = udiv i64 %a, %b + store i64 %div, i64 *%dest + ret void +} + +; Testg memory remainder with no displacement. +define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) { +; CHECK: f5: +; CHECK-NOT: %r3 +; CHECK: {{llill|lghi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlg %r2, 0(%r4) +; CHECK: stg %r2, 0(%r5) +; CHECK: br %r14 + %b = load i64 *%src + %rem = urem i64 %a, %b + store i64 %rem, i64 *%dest + ret void +} + +; Testg both memory division and memory remainder. +define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK-NOT: %r3 +; CHECK: {{llill|lghi}} %r2, 0 +; CHECK-NOT: %r3 +; CHECK: dlg %r2, 0(%r4) +; CHECK-NOT: {{dlg|dlgr}} +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %b = load i64 *%src + %div = udiv i64 %a, %b + %rem = urem i64 %a, %b + %or = or i64 %rem, %div + ret i64 %or +} + +; Check the high end of the DLG range. +define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: dlg %r2, 524280(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %rem = urem i64 %a, %b + ret i64 %rem +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f8: +; CHECK: agfi %r4, 524288 +; CHECK: dlg %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %rem = urem i64 %a, %b + ret i64 %rem +} + +; Check the high end of the negative aligned DLG range. +define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f9: +; CHECK: dlg %r2, -8(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %rem = urem i64 %a, %b + ret i64 %rem +} + +; Check the low end of the DLG range. +define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f10: +; CHECK: dlg %r2, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %rem = urem i64 %a, %b + ret i64 %rem +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f11(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f11: +; CHECK: agfi %r4, -524296 +; CHECK: dlg %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %rem = urem i64 %a, %b + ret i64 %rem +} + +; Check that DLG allows an index. +define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) { +; CHECK: f12: +; CHECK: dlg %r2, 524287(%r5,%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %rem = urem i64 %a, %b + ret i64 %rem +} diff --git a/test/CodeGen/SystemZ/int-move-01.ll b/test/CodeGen/SystemZ/int-move-01.ll new file mode 100644 index 0000000..ae890ad --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-01.ll @@ -0,0 +1,35 @@ +; Test moves between GPRs. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test 8-bit moves, which should get promoted to i32. +define i8 @f1(i8 %a, i8 %b) { +; CHECK: f1: +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + ret i8 %b +} + +; Test 16-bit moves, which again should get promoted to i32. +define i16 @f2(i16 %a, i16 %b) { +; CHECK: f2: +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + ret i16 %b +} + +; Test 32-bit moves. +define i32 @f3(i32 %a, i32 %b) { +; CHECK: f3: +; CHECK: lr %r2, %r3 +; CHECK: br %r14 + ret i32 %b +} + +; Test 64-bit moves. +define i64 @f4(i64 %a, i64 %b) { +; CHECK: f4: +; CHECK: lgr %r2, %r3 +; CHECK: br %r14 + ret i64 %b +} diff --git a/test/CodeGen/SystemZ/int-move-02.ll b/test/CodeGen/SystemZ/int-move-02.ll new file mode 100644 index 0000000..467e22d --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-02.ll @@ -0,0 +1,110 @@ +; Test 32-bit GPR loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the L range. +define i32 @f1(i32 *%src) { +; CHECK: f1: +; CHECK: l %r2, 0(%r2) +; CHECK: br %r14 + %val = load i32 *%src + ret i32 %val +} + +; Check the high end of the aligned L range. +define i32 @f2(i32 *%src) { +; CHECK: f2: +; CHECK: l %r2, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %val = load i32 *%ptr + ret i32 %val +} + +; Check the next word up, which should use LY instead of L. +define i32 @f3(i32 *%src) { +; CHECK: f3: +; CHECK: ly %r2, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %val = load i32 *%ptr + ret i32 %val +} + +; Check the high end of the aligned LY range. +define i32 @f4(i32 *%src) { +; CHECK: f4: +; CHECK: ly %r2, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %val = load i32 *%ptr + ret i32 %val +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f5(i32 *%src) { +; CHECK: f5: +; CHECK: agfi %r2, 524288 +; CHECK: l %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %val = load i32 *%ptr + ret i32 %val +} + +; Check the high end of the negative aligned LY range. +define i32 @f6(i32 *%src) { +; CHECK: f6: +; CHECK: ly %r2, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %val = load i32 *%ptr + ret i32 %val +} + +; Check the low end of the LY range. +define i32 @f7(i32 *%src) { +; CHECK: f7: +; CHECK: ly %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %val = load i32 *%ptr + ret i32 %val +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f8(i32 *%src) { +; CHECK: f8: +; CHECK: agfi %r2, -524292 +; CHECK: l %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %val = load i32 *%ptr + ret i32 %val +} + +; Check that L allows an index. +define i32 @f9(i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: l %r2, 4095({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to i32 * + %val = load i32 *%ptr + ret i32 %val +} + +; Check that LY allows an index. +define i32 @f10(i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: ly %r2, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %val = load i32 *%ptr + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/int-move-03.ll b/test/CodeGen/SystemZ/int-move-03.ll new file mode 100644 index 0000000..97c70a2 --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-03.ll @@ -0,0 +1,78 @@ +; Test 64-bit GPR loads. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check LG with no displacement. +define i64 @f1(i64 *%src) { +; CHECK: f1: +; CHECK: lg %r2, 0(%r2) +; CHECK: br %r14 + %val = load i64 *%src + ret i64 %val +} + +; Check the high end of the aligned LG range. +define i64 @f2(i64 *%src) { +; CHECK: f2: +; CHECK: lg %r2, 524280(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %val = load i64 *%ptr + ret i64 %val +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f3(i64 *%src) { +; CHECK: f3: +; CHECK: agfi %r2, 524288 +; CHECK: lg %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %val = load i64 *%ptr + ret i64 %val +} + +; Check the high end of the negative aligned LG range. +define i64 @f4(i64 *%src) { +; CHECK: f4: +; CHECK: lg %r2, -8(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %val = load i64 *%ptr + ret i64 %val +} + +; Check the low end of the LG range. +define i64 @f5(i64 *%src) { +; CHECK: f5: +; CHECK: lg %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %val = load i64 *%ptr + ret i64 %val +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i64 *%src) { +; CHECK: f6: +; CHECK: agfi %r2, -524296 +; CHECK: lg %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %val = load i64 *%ptr + ret i64 %val +} + +; Check that LG allows an index. +define i64 @f7(i64 %src, i64 %index) { +; CHECK: f7: +; CHECK: lg %r2, 524287({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %val = load i64 *%ptr + ret i64 %val +} diff --git a/test/CodeGen/SystemZ/int-move-04.ll b/test/CodeGen/SystemZ/int-move-04.ll new file mode 100644 index 0000000..9736657 --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-04.ll @@ -0,0 +1,130 @@ +; Test 8-bit GPR stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test an i8 store, which should get converted into an i32 truncation. +define void @f1(i8 *%dst, i8 %val) { +; CHECK: f1: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + store i8 %val, i8 *%dst + ret void +} + +; Test an i32 truncating store. +define void @f2(i8 *%dst, i32 %val) { +; CHECK: f2: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + %trunc = trunc i32 %val to i8 + store i8 %trunc, i8 *%dst + ret void +} + +; Test an i64 truncating store. +define void @f3(i8 *%dst, i64 %val) { +; CHECK: f3: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + %trunc = trunc i64 %val to i8 + store i8 %trunc, i8 *%dst + ret void +} + +; Check the high end of the STC range. +define void @f4(i8 *%dst, i8 %val) { +; CHECK: f4: +; CHECK: stc %r3, 4095(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%dst, i64 4095 + store i8 %val, i8 *%ptr + ret void +} + +; Check the next byte up, which should use STCY instead of STC. +define void @f5(i8 *%dst, i8 %val) { +; CHECK: f5: +; CHECK: stcy %r3, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%dst, i64 4096 + store i8 %val, i8 *%ptr + ret void +} + +; Check the high end of the STCY range. +define void @f6(i8 *%dst, i8 %val) { +; CHECK: f6: +; CHECK: stcy %r3, 524287(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%dst, i64 524287 + store i8 %val, i8 *%ptr + ret void +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f7(i8 *%dst, i8 %val) { +; CHECK: f7: +; CHECK: agfi %r2, 524288 +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%dst, i64 524288 + store i8 %val, i8 *%ptr + ret void +} + +; Check the high end of the negative STCY range. +define void @f8(i8 *%dst, i8 %val) { +; CHECK: f8: +; CHECK: stcy %r3, -1(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%dst, i64 -1 + store i8 %val, i8 *%ptr + ret void +} + +; Check the low end of the STCY range. +define void @f9(i8 *%dst, i8 %val) { +; CHECK: f9: +; CHECK: stcy %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%dst, i64 -524288 + store i8 %val, i8 *%ptr + ret void +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f10(i8 *%dst, i8 %val) { +; CHECK: f10: +; CHECK: agfi %r2, -524289 +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8 *%dst, i64 -524289 + store i8 %val, i8 *%ptr + ret void +} + +; Check that STC allows an index. +define void @f11(i64 %dst, i64 %index, i8 %val) { +; CHECK: f11: +; CHECK: stc %r4, 4095(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %dst, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to i8 * + store i8 %val, i8 *%ptr + ret void +} + +; Check that STCY allows an index. +define void @f12(i64 %dst, i64 %index, i8 %val) { +; CHECK: f12: +; CHECK: stcy %r4, 4096(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %dst, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i8 * + store i8 %val, i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-move-05.ll b/test/CodeGen/SystemZ/int-move-05.ll new file mode 100644 index 0000000..f61477e --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-05.ll @@ -0,0 +1,130 @@ +; Test 16-bit GPR stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test an i16 store, which should get converted into an i32 truncation. +define void @f1(i16 *%dst, i16 %val) { +; CHECK: f1: +; CHECK: sth %r3, 0(%r2) +; CHECK: br %r14 + store i16 %val, i16 *%dst + ret void +} + +; Test an i32 truncating store. +define void @f2(i16 *%dst, i32 %val) { +; CHECK: f2: +; CHECK: sth %r3, 0(%r2) +; CHECK: br %r14 + %trunc = trunc i32 %val to i16 + store i16 %trunc, i16 *%dst + ret void +} + +; Test an i64 truncating store. +define void @f3(i16 *%dst, i64 %val) { +; CHECK: f3: +; CHECK: sth %r3, 0(%r2) +; CHECK: br %r14 + %trunc = trunc i64 %val to i16 + store i16 %trunc, i16 *%dst + ret void +} + +; Check the high end of the STH range. +define void @f4(i16 *%dst, i16 %val) { +; CHECK: f4: +; CHECK: sth %r3, 4094(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%dst, i64 2047 + store i16 %val, i16 *%ptr + ret void +} + +; Check the next halfword up, which should use STHY instead of STH. +define void @f5(i16 *%dst, i16 %val) { +; CHECK: f5: +; CHECK: sthy %r3, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%dst, i64 2048 + store i16 %val, i16 *%ptr + ret void +} + +; Check the high end of the aligned STHY range. +define void @f6(i16 *%dst, i16 %val) { +; CHECK: f6: +; CHECK: sthy %r3, 524286(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%dst, i64 262143 + store i16 %val, i16 *%ptr + ret void +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f7(i16 *%dst, i16 %val) { +; CHECK: f7: +; CHECK: agfi %r2, 524288 +; CHECK: sth %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%dst, i64 262144 + store i16 %val, i16 *%ptr + ret void +} + +; Check the high end of the negative aligned STHY range. +define void @f8(i16 *%dst, i16 %val) { +; CHECK: f8: +; CHECK: sthy %r3, -2(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%dst, i64 -1 + store i16 %val, i16 *%ptr + ret void +} + +; Check the low end of the STHY range. +define void @f9(i16 *%dst, i16 %val) { +; CHECK: f9: +; CHECK: sthy %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%dst, i64 -262144 + store i16 %val, i16 *%ptr + ret void +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f10(i16 *%dst, i16 %val) { +; CHECK: f10: +; CHECK: agfi %r2, -524290 +; CHECK: sth %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16 *%dst, i64 -262145 + store i16 %val, i16 *%ptr + ret void +} + +; Check that STH allows an index. +define void @f11(i64 %dst, i64 %index, i16 %val) { +; CHECK: f11: +; CHECK: sth %r4, 4094({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %dst, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + store i16 %val, i16 *%ptr + ret void +} + +; Check that STHY allows an index. +define void @f12(i64 %dst, i64 %index, i16 %val) { +; CHECK: f12: +; CHECK: sthy %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %dst, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + store i16 %val, i16 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-move-06.ll b/test/CodeGen/SystemZ/int-move-06.ll new file mode 100644 index 0000000..5b35a32 --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-06.ll @@ -0,0 +1,117 @@ +; Test 32-bit GPR stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test an i32 store. +define void @f1(i32 *%dst, i32 %val) { +; CHECK: f1: +; CHECK: st %r3, 0(%r2) +; CHECK: br %r14 + store i32 %val, i32 *%dst + ret void +} + +; Test a truncating i64 store. +define void @f2(i32 *%dst, i64 %val) { + %word = trunc i64 %val to i32 + store i32 %word, i32 *%dst + ret void +} + +; Check the high end of the aligned ST range. +define void @f3(i32 *%dst, i32 %val) { +; CHECK: f3: +; CHECK: st %r3, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%dst, i64 1023 + store i32 %val, i32 *%ptr + ret void +} + +; Check the next word up, which should use STY instead of ST. +define void @f4(i32 *%dst, i32 %val) { +; CHECK: f4: +; CHECK: sty %r3, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%dst, i64 1024 + store i32 %val, i32 *%ptr + ret void +} + +; Check the high end of the aligned STY range. +define void @f5(i32 *%dst, i32 %val) { +; CHECK: f5: +; CHECK: sty %r3, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%dst, i64 131071 + store i32 %val, i32 *%ptr + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i32 *%dst, i32 %val) { +; CHECK: f6: +; CHECK: agfi %r2, 524288 +; CHECK: st %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%dst, i64 131072 + store i32 %val, i32 *%ptr + ret void +} + +; Check the high end of the negative aligned STY range. +define void @f7(i32 *%dst, i32 %val) { +; CHECK: f7: +; CHECK: sty %r3, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%dst, i64 -1 + store i32 %val, i32 *%ptr + ret void +} + +; Check the low end of the STY range. +define void @f8(i32 *%dst, i32 %val) { +; CHECK: f8: +; CHECK: sty %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%dst, i64 -131072 + store i32 %val, i32 *%ptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f9(i32 *%dst, i32 %val) { +; CHECK: f9: +; CHECK: agfi %r2, -524292 +; CHECK: st %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%dst, i64 -131073 + store i32 %val, i32 *%ptr + ret void +} + +; Check that ST allows an index. +define void @f10(i64 %dst, i64 %index, i32 %val) { +; CHECK: f10: +; CHECK: st %r4, 4095(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %dst, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to i32 * + store i32 %val, i32 *%ptr + ret void +} + +; Check that STY allows an index. +define void @f11(i64 %dst, i64 %index, i32 %val) { +; CHECK: f11: +; CHECK: sty %r4, 4096(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %dst, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + store i32 %val, i32 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-move-07.ll b/test/CodeGen/SystemZ/int-move-07.ll new file mode 100644 index 0000000..ab21ab0 --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-07.ll @@ -0,0 +1,78 @@ +; Test 64-bit GPR stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check STG with no displacement. +define void @f1(i64 *%dst, i64 %val) { +; CHECK: f1: +; CHECK: stg %r3, 0(%r2) +; CHECK: br %r14 + store i64 %val, i64 *%dst + ret void +} + +; Check the high end of the aligned STG range. +define void @f2(i64 *%dst, i64 %val) { +; CHECK: f2: +; CHECK: stg %r3, 524280(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%dst, i64 65535 + store i64 %val, i64 *%ptr + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f3(i64 *%dst, i64 %val) { +; CHECK: f3: +; CHECK: agfi %r2, 524288 +; CHECK: stg %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%dst, i64 65536 + store i64 %val, i64 *%ptr + ret void +} + +; Check the high end of the negative aligned STG range. +define void @f4(i64 *%dst, i64 %val) { +; CHECK: f4: +; CHECK: stg %r3, -8(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%dst, i64 -1 + store i64 %val, i64 *%ptr + ret void +} + +; Check the low end of the STG range. +define void @f5(i64 *%dst, i64 %val) { +; CHECK: f5: +; CHECK: stg %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%dst, i64 -65536 + store i64 %val, i64 *%ptr + ret void +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i64 *%dst, i64 %val) { +; CHECK: f6: +; CHECK: agfi %r2, -524296 +; CHECK: stg %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%dst, i64 -65537 + store i64 %val, i64 *%ptr + ret void +} + +; Check that STG allows an index. +define void @f7(i64 %dst, i64 %index, i64 %val) { +; CHECK: f7: +; CHECK: stg %r4, 524287({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %dst, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + store i64 %val, i64 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-move-08.ll b/test/CodeGen/SystemZ/int-move-08.ll new file mode 100644 index 0000000..5640fec --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-08.ll @@ -0,0 +1,49 @@ +; Test 32-bit GPR accesses to a PC-relative location. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@gsrc16 = global i16 1 +@gsrc32 = global i32 1 +@gdst16 = global i16 2 +@gdst32 = global i32 2 + +; Check sign-extending loads from i16. +define i32 @f1() { +; CHECK: f1: +; CHECK: lhrl %r2, gsrc16 +; CHECK: br %r14 + %val = load i16 *@gsrc16 + %ext = sext i16 %val to i32 + ret i32 %ext +} + +; Check zero-extending loads from i16. +define i32 @f2() { +; CHECK: f2: +; CHECK: llhrl %r2, gsrc16 +; CHECK: br %r14 + %val = load i16 *@gsrc16 + %ext = zext i16 %val to i32 + ret i32 %ext +} + +; Check truncating 16-bit stores. +define void @f3(i32 %val) { +; CHECK: f3: +; CHECK: sthrl %r2, gdst16 +; CHECK: br %r14 + %half = trunc i32 %val to i16 + store i16 %half, i16 *@gdst16 + ret void +} + +; Check plain loads and stores. +define void @f4() { +; CHECK: f4: +; CHECK: lrl %r0, gsrc32 +; CHECK: strl %r0, gdst32 +; CHECK: br %r14 + %val = load i32 *@gsrc32 + store i32 %val, i32 *@gdst32 + ret void +} diff --git a/test/CodeGen/SystemZ/int-move-09.ll b/test/CodeGen/SystemZ/int-move-09.ll new file mode 100644 index 0000000..a7a8c82 --- /dev/null +++ b/test/CodeGen/SystemZ/int-move-09.ll @@ -0,0 +1,81 @@ +; Test 64-bit GPR accesses to a PC-relative location. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@gsrc16 = global i16 1 +@gsrc32 = global i32 1 +@gsrc64 = global i64 1 +@gdst16 = global i16 2 +@gdst32 = global i32 2 +@gdst64 = global i64 2 + +; Check sign-extending loads from i16. +define i64 @f1() { +; CHECK: f1: +; CHECK: lghrl %r2, gsrc16 +; CHECK: br %r14 + %val = load i16 *@gsrc16 + %ext = sext i16 %val to i64 + ret i64 %ext +} + +; Check zero-extending loads from i16. +define i64 @f2() { +; CHECK: f2: +; CHECK: llghrl %r2, gsrc16 +; CHECK: br %r14 + %val = load i16 *@gsrc16 + %ext = zext i16 %val to i64 + ret i64 %ext +} + +; Check sign-extending loads from i32. +define i64 @f3() { +; CHECK: f3: +; CHECK: lgfrl %r2, gsrc32 +; CHECK: br %r14 + %val = load i32 *@gsrc32 + %ext = sext i32 %val to i64 + ret i64 %ext +} + +; Check zero-extending loads from i32. +define i64 @f4() { +; CHECK: f4: +; CHECK: llgfrl %r2, gsrc32 +; CHECK: br %r14 + %val = load i32 *@gsrc32 + %ext = zext i32 %val to i64 + ret i64 %ext +} + +; Check truncating 16-bit stores. +define void @f5(i64 %val) { +; CHECK: f5: +; CHECK: sthrl %r2, gdst16 +; CHECK: br %r14 + %half = trunc i64 %val to i16 + store i16 %half, i16 *@gdst16 + ret void +} + +; Check truncating 32-bit stores. +define void @f6(i64 %val) { +; CHECK: f6: +; CHECK: strl %r2, gdst32 +; CHECK: br %r14 + %word = trunc i64 %val to i32 + store i32 %word, i32 *@gdst32 + ret void +} + +; Check plain loads and stores. +define void @f7() { +; CHECK: f7: +; CHECK: lgrl %r0, gsrc64 +; CHECK: stgrl %r0, gdst64 +; CHECK: br %r14 + %val = load i64 *@gsrc64 + store i64 %val, i64 *@gdst64 + ret void +} diff --git a/test/CodeGen/SystemZ/int-mul-01.ll b/test/CodeGen/SystemZ/int-mul-01.ll new file mode 100644 index 0000000..e1246e2 --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-01.ll @@ -0,0 +1,131 @@ +; Test 32-bit multiplication in which the second operand is a sign-extended +; i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the MH range. +define i32 @f1(i32 %lhs, i16 *%src) { +; CHECK: f1: +; CHECK: mh %r2, 0(%r3) +; CHECK: br %r14 + %half = load i16 *%src + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the aligned MH range. +define i32 @f2(i32 %lhs, i16 *%src) { +; CHECK: f2: +; CHECK: mh %r2, 4094(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2047 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword up, which should use MHY instead of MH. +define i32 @f3(i32 %lhs, i16 *%src) { +; CHECK: f3: +; CHECK: mhy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 2048 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the aligned MHY range. +define i32 @f4(i32 %lhs, i16 *%src) { +; CHECK: f4: +; CHECK: mhy %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262143 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f5(i32 %lhs, i16 *%src) { +; CHECK: f5: +; CHECK: agfi %r3, 524288 +; CHECK: mh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check the high end of the negative aligned MHY range. +define i32 @f6(i32 %lhs, i16 *%src) { +; CHECK: f6: +; CHECK: mhy %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -1 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check the low end of the MHY range. +define i32 @f7(i32 %lhs, i16 *%src) { +; CHECK: f7: +; CHECK: mhy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262144 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f8(i32 %lhs, i16 *%src) { +; CHECK: f8: +; CHECK: agfi %r3, -524290 +; CHECK: mh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16 *%src, i64 -262145 + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check that MH allows an index. +define i32 @f9(i32 %lhs, i64 %src, i64 %index) { +; CHECK: f9: +; CHECK: mh %r2, 4094({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} + +; Check that MHY allows an index. +define i32 @f10(i32 %lhs, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: mhy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16 *%ptr + %rhs = sext i16 %half to i32 + %res = mul i32 %lhs, %rhs + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/int-mul-02.ll b/test/CodeGen/SystemZ/int-mul-02.ll new file mode 100644 index 0000000..d39c4dd --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-02.ll @@ -0,0 +1,129 @@ +; Test 32-bit multiplication in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check MSR. +define i32 @f1(i32 %a, i32 %b) { +; CHECK: f1: +; CHECK: msr %r2, %r3 +; CHECK: br %r14 + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check the low end of the MS range. +define i32 @f2(i32 %a, i32 *%src) { +; CHECK: f2: +; CHECK: ms %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check the high end of the aligned MS range. +define i32 @f3(i32 %a, i32 *%src) { +; CHECK: f3: +; CHECK: ms %r2, 4092(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check the next word up, which should use MSY instead of MS. +define i32 @f4(i32 %a, i32 *%src) { +; CHECK: f4: +; CHECK: msy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check the high end of the aligned MSY range. +define i32 @f5(i32 %a, i32 *%src) { +; CHECK: f5: +; CHECK: msy %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i32 %a, i32 *%src) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: ms %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check the high end of the negative aligned MSY range. +define i32 @f7(i32 %a, i32 *%src) { +; CHECK: f7: +; CHECK: msy %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check the low end of the MSY range. +define i32 @f8(i32 %a, i32 *%src) { +; CHECK: f8: +; CHECK: msy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f9(i32 %a, i32 *%src) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: ms %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check that MS allows an index. +define i32 @f10(i32 %a, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: ms %r2, 4092({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check that MSY allows an index. +define i32 @f11(i32 %a, i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: msy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %mul = mul i32 %a, %b + ret i32 %mul +} diff --git a/test/CodeGen/SystemZ/int-mul-03.ll b/test/CodeGen/SystemZ/int-mul-03.ll new file mode 100644 index 0000000..ab4ef9e --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-03.ll @@ -0,0 +1,102 @@ +; Test multiplications between an i64 and a sign-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check MSGFR. +define i64 @f1(i64 %a, i32 %b) { +; CHECK: f1: +; CHECK: msgfr %r2, %r3 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check MSGF with no displacement. +define i64 @f2(i64 %a, i32 *%src) { +; CHECK: f2: +; CHECK: msgf %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the high end of the aligned MSGF range. +define i64 @f3(i64 %a, i32 *%src) { +; CHECK: f3: +; CHECK: msgf %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i32 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: msgf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the high end of the negative aligned MSGF range. +define i64 @f5(i64 %a, i32 *%src) { +; CHECK: f5: +; CHECK: msgf %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the low end of the MSGF range. +define i64 @f6(i64 %a, i32 *%src) { +; CHECK: f6: +; CHECK: msgf %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i32 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524292 +; CHECK: msgf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check that MSGF allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: msgf %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} diff --git a/test/CodeGen/SystemZ/int-mul-04.ll b/test/CodeGen/SystemZ/int-mul-04.ll new file mode 100644 index 0000000..94c2639 --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-04.ll @@ -0,0 +1,94 @@ +; Test 64-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check MSGR. +define i64 @f1(i64 %a, i64 %b) { +; CHECK: f1: +; CHECK: msgr %r2, %r3 +; CHECK: br %r14 + %mul = mul i64 %a, %b + ret i64 %mul +} + +; Check MSG with no displacement. +define i64 @f2(i64 %a, i64 *%src) { +; CHECK: f2: +; CHECK: msg %r2, 0(%r3) +; CHECK: br %r14 + %b = load i64 *%src + %mul = mul i64 %a, %b + ret i64 %mul +} + +; Check the high end of the aligned MSG range. +define i64 @f3(i64 %a, i64 *%src) { +; CHECK: f3: +; CHECK: msg %r2, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %mul = mul i64 %a, %b + ret i64 %mul +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: msg %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %mul = mul i64 %a, %b + ret i64 %mul +} + +; Check the high end of the negative aligned MSG range. +define i64 @f5(i64 %a, i64 *%src) { +; CHECK: f5: +; CHECK: msg %r2, -8(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %mul = mul i64 %a, %b + ret i64 %mul +} + +; Check the low end of the MSG range. +define i64 @f6(i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK: msg %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %mul = mul i64 %a, %b + ret i64 %mul +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524296 +; CHECK: msg %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %mul = mul i64 %a, %b + ret i64 %mul +} + +; Check that MSG allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: msg %r2, 524280({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %mul = mul i64 %a, %b + ret i64 %mul +} diff --git a/test/CodeGen/SystemZ/int-mul-05.ll b/test/CodeGen/SystemZ/int-mul-05.ll new file mode 100644 index 0000000..5e4031b --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-05.ll @@ -0,0 +1,159 @@ +; Test 32-bit multiplication in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check multiplication by 2, which should use shifts. +define i32 @f1(i32 %a, i32 *%dest) { +; CHECK: f1: +; CHECK: sll %r2, 1 +; CHECK: br %r14 + %mul = mul i32 %a, 2 + ret i32 %mul +} + +; Check multiplication by 3. +define i32 @f2(i32 %a, i32 *%dest) { +; CHECK: f2: +; CHECK: mhi %r2, 3 +; CHECK: br %r14 + %mul = mul i32 %a, 3 + ret i32 %mul +} + +; Check the high end of the MHI range. +define i32 @f3(i32 %a, i32 *%dest) { +; CHECK: f3: +; CHECK: mhi %r2, 32767 +; CHECK: br %r14 + %mul = mul i32 %a, 32767 + ret i32 %mul +} + +; Check the next value up, which should use shifts. +define i32 @f4(i32 %a, i32 *%dest) { +; CHECK: f4: +; CHECK: sll %r2, 15 +; CHECK: br %r14 + %mul = mul i32 %a, 32768 + ret i32 %mul +} + +; Check the next value up again, which can use MSFI. +define i32 @f5(i32 %a, i32 *%dest) { +; CHECK: f5: +; CHECK: msfi %r2, 32769 +; CHECK: br %r14 + %mul = mul i32 %a, 32769 + ret i32 %mul +} + +; Check the high end of the MSFI range. +define i32 @f6(i32 %a, i32 *%dest) { +; CHECK: f6: +; CHECK: msfi %r2, 2147483647 +; CHECK: br %r14 + %mul = mul i32 %a, 2147483647 + ret i32 %mul +} + +; Check the next value up, which should use shifts. +define i32 @f7(i32 %a, i32 *%dest) { +; CHECK: f7: +; CHECK: sll %r2, 31 +; CHECK: br %r14 + %mul = mul i32 %a, 2147483648 + ret i32 %mul +} + +; Check the next value up again, which is treated as a negative value. +define i32 @f8(i32 %a, i32 *%dest) { +; CHECK: f8: +; CHECK: msfi %r2, -2147483647 +; CHECK: br %r14 + %mul = mul i32 %a, 2147483649 + ret i32 %mul +} + +; Check multiplication by -1, which is a negation. +define i32 @f9(i32 %a, i32 *%dest) { +; CHECK: f9: +; CHECK: lcr %r2, %r2 +; CHECK: br %r14 + %mul = mul i32 %a, -1 + ret i32 %mul +} + +; Check multiplication by -2, which should use shifts. +define i32 @f10(i32 %a, i32 *%dest) { +; CHECK: f10: +; CHECK: sll %r2, 1 +; CHECK: lcr %r2, %r2 +; CHECK: br %r14 + %mul = mul i32 %a, -2 + ret i32 %mul +} + +; Check multiplication by -3. +define i32 @f11(i32 %a, i32 *%dest) { +; CHECK: f11: +; CHECK: mhi %r2, -3 +; CHECK: br %r14 + %mul = mul i32 %a, -3 + ret i32 %mul +} + +; Check the lowest useful MHI value. +define i32 @f12(i32 %a, i32 *%dest) { +; CHECK: f12: +; CHECK: mhi %r2, -32767 +; CHECK: br %r14 + %mul = mul i32 %a, -32767 + ret i32 %mul +} + +; Check the next value down, which should use shifts. +define i32 @f13(i32 %a, i32 *%dest) { +; CHECK: f13: +; CHECK: sll %r2, 15 +; CHECK: lcr %r2, %r2 +; CHECK: br %r14 + %mul = mul i32 %a, -32768 + ret i32 %mul +} + +; Check the next value down again, which can use MSFI. +define i32 @f14(i32 %a, i32 *%dest) { +; CHECK: f14: +; CHECK: msfi %r2, -32769 +; CHECK: br %r14 + %mul = mul i32 %a, -32769 + ret i32 %mul +} + +; Check the lowest useful MSFI value. +define i32 @f15(i32 %a, i32 *%dest) { +; CHECK: f15: +; CHECK: msfi %r2, -2147483647 +; CHECK: br %r14 + %mul = mul i32 %a, -2147483647 + ret i32 %mul +} + +; Check the next value down, which should use shifts. +define i32 @f16(i32 %a, i32 *%dest) { +; CHECK: f16: +; CHECK: sll %r2, 31 +; CHECK-NOT: lcr +; CHECK: br %r14 + %mul = mul i32 %a, -2147483648 + ret i32 %mul +} + +; Check the next value down again, which is treated as a positive value. +define i32 @f17(i32 %a, i32 *%dest) { +; CHECK: f17: +; CHECK: msfi %r2, 2147483647 +; CHECK: br %r14 + %mul = mul i32 %a, -2147483649 + ret i32 %mul +} diff --git a/test/CodeGen/SystemZ/int-mul-06.ll b/test/CodeGen/SystemZ/int-mul-06.ll new file mode 100644 index 0000000..a354605 --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-06.ll @@ -0,0 +1,159 @@ +; Test 64-bit multiplication in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check multiplication by 2, which should use shifts. +define i64 @f1(i64 %a, i64 *%dest) { +; CHECK: f1: +; CHECK: sllg %r2, %r2, 1 +; CHECK: br %r14 + %mul = mul i64 %a, 2 + ret i64 %mul +} + +; Check multiplication by 3. +define i64 @f2(i64 %a, i64 *%dest) { +; CHECK: f2: +; CHECK: mghi %r2, 3 +; CHECK: br %r14 + %mul = mul i64 %a, 3 + ret i64 %mul +} + +; Check the high end of the MGHI range. +define i64 @f3(i64 %a, i64 *%dest) { +; CHECK: f3: +; CHECK: mghi %r2, 32767 +; CHECK: br %r14 + %mul = mul i64 %a, 32767 + ret i64 %mul +} + +; Check the next value up, which should use shifts. +define i64 @f4(i64 %a, i64 *%dest) { +; CHECK: f4: +; CHECK: sllg %r2, %r2, 15 +; CHECK: br %r14 + %mul = mul i64 %a, 32768 + ret i64 %mul +} + +; Check the next value up again, which can use MSGFI. +define i64 @f5(i64 %a, i64 *%dest) { +; CHECK: f5: +; CHECK: msgfi %r2, 32769 +; CHECK: br %r14 + %mul = mul i64 %a, 32769 + ret i64 %mul +} + +; Check the high end of the MSGFI range. +define i64 @f6(i64 %a, i64 *%dest) { +; CHECK: f6: +; CHECK: msgfi %r2, 2147483647 +; CHECK: br %r14 + %mul = mul i64 %a, 2147483647 + ret i64 %mul +} + +; Check the next value up, which should use shifts. +define i64 @f7(i64 %a, i64 *%dest) { +; CHECK: f7: +; CHECK: sllg %r2, %r2, 31 +; CHECK: br %r14 + %mul = mul i64 %a, 2147483648 + ret i64 %mul +} + +; Check the next value up again, which cannot use a constant multiplicatoin. +define i64 @f8(i64 %a, i64 *%dest) { +; CHECK: f8: +; CHECK-NOT: msgfi +; CHECK: br %r14 + %mul = mul i64 %a, 2147483649 + ret i64 %mul +} + +; Check multiplication by -1, which is a negation. +define i64 @f9(i64 %a, i64 *%dest) { +; CHECK: f9: +; CHECK: lcgr {{%r[0-5]}}, %r2 +; CHECK: br %r14 + %mul = mul i64 %a, -1 + ret i64 %mul +} + +; Check multiplication by -2, which should use shifts. +define i64 @f10(i64 %a, i64 *%dest) { +; CHECK: f10: +; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 1 +; CHECK: lcgr %r2, [[SHIFTED]] +; CHECK: br %r14 + %mul = mul i64 %a, -2 + ret i64 %mul +} + +; Check multiplication by -3. +define i64 @f11(i64 %a, i64 *%dest) { +; CHECK: f11: +; CHECK: mghi %r2, -3 +; CHECK: br %r14 + %mul = mul i64 %a, -3 + ret i64 %mul +} + +; Check the lowest useful MGHI value. +define i64 @f12(i64 %a, i64 *%dest) { +; CHECK: f12: +; CHECK: mghi %r2, -32767 +; CHECK: br %r14 + %mul = mul i64 %a, -32767 + ret i64 %mul +} + +; Check the next value down, which should use shifts. +define i64 @f13(i64 %a, i64 *%dest) { +; CHECK: f13: +; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 15 +; CHECK: lcgr %r2, [[SHIFTED]] +; CHECK: br %r14 + %mul = mul i64 %a, -32768 + ret i64 %mul +} + +; Check the next value down again, which can use MSGFI. +define i64 @f14(i64 %a, i64 *%dest) { +; CHECK: f14: +; CHECK: msgfi %r2, -32769 +; CHECK: br %r14 + %mul = mul i64 %a, -32769 + ret i64 %mul +} + +; Check the lowest useful MSGFI value. +define i64 @f15(i64 %a, i64 *%dest) { +; CHECK: f15: +; CHECK: msgfi %r2, -2147483647 +; CHECK: br %r14 + %mul = mul i64 %a, -2147483647 + ret i64 %mul +} + +; Check the next value down, which should use shifts. +define i64 @f16(i64 %a, i64 *%dest) { +; CHECK: f16: +; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 31 +; CHECK: lcgr %r2, [[SHIFTED]] +; CHECK: br %r14 + %mul = mul i64 %a, -2147483648 + ret i64 %mul +} + +; Check the next value down again, which cannot use constant multiplication +define i64 @f17(i64 %a, i64 *%dest) { +; CHECK: f17: +; CHECK-NOT: msgfi +; CHECK: br %r14 + %mul = mul i64 %a, -2147483649 + ret i64 %mul +} diff --git a/test/CodeGen/SystemZ/int-mul-07.ll b/test/CodeGen/SystemZ/int-mul-07.ll new file mode 100644 index 0000000..2459cc3 --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-07.ll @@ -0,0 +1,64 @@ +; Test high-part i32->i64 multiplications. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; We don't provide *MUL_LOHI or MULH* for the patterns in this file, +; but they should at least still work. + +; Check zero-extended multiplication in which only the high part is used. +define i32 @f1(i32 %a, i32 %b) { +; CHECK: f1: +; CHECK: msgr +; CHECK: br %r14 + %ax = zext i32 %a to i64 + %bx = zext i32 %b to i64 + %mulx = mul i64 %ax, %bx + %highx = lshr i64 %mulx, 32 + %high = trunc i64 %highx to i32 + ret i32 %high +} + +; Check sign-extended multiplication in which only the high part is used. +define i32 @f2(i32 %a, i32 %b) { +; CHECK: f2: +; CHECK: msgfr +; CHECK: br %r14 + %ax = sext i32 %a to i64 + %bx = sext i32 %b to i64 + %mulx = mul i64 %ax, %bx + %highx = lshr i64 %mulx, 32 + %high = trunc i64 %highx to i32 + ret i32 %high +} + +; Check zero-extended multiplication in which the result is split into +; high and low halves. +define i32 @f3(i32 %a, i32 %b) { +; CHECK: f3: +; CHECK: msgr +; CHECK: br %r14 + %ax = zext i32 %a to i64 + %bx = zext i32 %b to i64 + %mulx = mul i64 %ax, %bx + %highx = lshr i64 %mulx, 32 + %high = trunc i64 %highx to i32 + %low = trunc i64 %mulx to i32 + %or = or i32 %high, %low + ret i32 %or +} + +; Check sign-extended multiplication in which the result is split into +; high and low halves. +define i32 @f4(i32 %a, i32 %b) { +; CHECK: f4: +; CHECK: msgfr +; CHECK: br %r14 + %ax = sext i32 %a to i64 + %bx = sext i32 %b to i64 + %mulx = mul i64 %ax, %bx + %highx = lshr i64 %mulx, 32 + %high = trunc i64 %highx to i32 + %low = trunc i64 %mulx to i32 + %or = or i32 %high, %low + ret i32 %or +} diff --git a/test/CodeGen/SystemZ/int-mul-08.ll b/test/CodeGen/SystemZ/int-mul-08.ll new file mode 100644 index 0000000..09ebe7a --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-08.ll @@ -0,0 +1,188 @@ +; Test high-part i64->i128 multiplications. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check zero-extended multiplication in which only the high part is used. +define i64 @f1(i64 %dummy, i64 %a, i64 %b) { +; CHECK: f1: +; CHECK-NOT: {{%r[234]}} +; CHECK: mlgr %r2, %r4 +; CHECK: br %r14 + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check sign-extended multiplication in which only the high part is used. +; This needs a rather convoluted sequence. +define i64 @f2(i64 %dummy, i64 %a, i64 %b) { +; CHECK: f2: +; CHECK: mlgr +; CHECK: agr +; CHECK: agr +; CHECK: br %r14 + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check zero-extended multiplication in which only part of the high half +; is used. +define i64 @f3(i64 %dummy, i64 %a, i64 %b) { +; CHECK: f3: +; CHECK-NOT: {{%r[234]}} +; CHECK: mlgr %r2, %r4 +; CHECK: srlg %r2, %r2, 3 +; CHECK: br %r14 + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 67 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check zero-extended multiplication in which the result is split into +; high and low halves. +define i64 @f4(i64 %dummy, i64 %a, i64 %b) { +; CHECK: f4: +; CHECK-NOT: {{%r[234]}} +; CHECK: mlgr %r2, %r4 +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + %low = trunc i128 %mulx to i64 + %or = or i64 %high, %low + ret i64 %or +} + +; Check division by a constant, which should use multiplication instead. +define i64 @f5(i64 %dummy, i64 %a) { +; CHECK: f5: +; CHECK: mlgr %r2, +; CHECK: srlg %r2, %r2, +; CHECK: br %r14 + %res = udiv i64 %a, 1234 + ret i64 %res +} + +; Check MLG with no displacement. +define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK-NOT: {{%r[234]}} +; CHECK: mlg %r2, 0(%r4) +; CHECK: br %r14 + %b = load i64 *%src + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the high end of the aligned MLG range. +define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: mlg %r2, 524280(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the next doubleword up, which requires separate address logic. +; Other sequences besides this one would be OK. +define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f8: +; CHECK: agfi %r4, 524288 +; CHECK: mlg %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the high end of the negative aligned MLG range. +define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f9: +; CHECK: mlg %r2, -8(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the low end of the MLG range. +define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { +; CHECK: f10: +; CHECK: mlg %r2, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f11(i64 *%dest, i64 %a, i64 *%src) { +; CHECK: f11: +; CHECK: agfi %r4, -524296 +; CHECK: mlg %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check that MLG allows an index. +define i64 @f12(i64 *%dest, i64 %a, i64 %src, i64 %index) { +; CHECK: f12: +; CHECK: mlg %r2, 524287(%r5,%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %ax = zext i64 %a to i128 + %bx = zext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} diff --git a/test/CodeGen/SystemZ/int-neg-01.ll b/test/CodeGen/SystemZ/int-neg-01.ll new file mode 100644 index 0000000..6114f4e --- /dev/null +++ b/test/CodeGen/SystemZ/int-neg-01.ll @@ -0,0 +1,42 @@ +; Test integer negation. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test i32->i32 negation. +define i32 @f1(i32 %val) { +; CHECK: f1: +; CHECK: lcr %r2, %r2 +; CHECK: br %r14 + %neg = sub i32 0, %val + ret i32 %neg +} + +; Test i32->i64 negation. +define i64 @f2(i32 %val) { +; CHECK: f2: +; CHECK: lcgfr %r2, %r2 +; CHECK: br %r14 + %ext = sext i32 %val to i64 + %neg = sub i64 0, %ext + ret i64 %neg +} + +; Test i32->i64 negation that uses an "in-register" form of sign extension. +define i64 @f3(i64 %val) { +; CHECK: f3: +; CHECK: lcgfr %r2, %r2 +; CHECK: br %r14 + %trunc = trunc i64 %val to i32 + %ext = sext i32 %trunc to i64 + %neg = sub i64 0, %ext + ret i64 %neg +} + +; Test i64 negation. +define i64 @f4(i64 %val) { +; CHECK: f4: +; CHECK: lcgr %r2, %r2 +; CHECK: br %r14 + %neg = sub i64 0, %val + ret i64 %neg +} diff --git a/test/CodeGen/SystemZ/int-sub-01.ll b/test/CodeGen/SystemZ/int-sub-01.ll new file mode 100644 index 0000000..9a73814 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-01.ll @@ -0,0 +1,129 @@ +; Test 32-bit subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check SR. +define i32 @f1(i32 %a, i32 %b) { +; CHECK: f1: +; CHECK: sr %r2, %r3 +; CHECK: br %r14 + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check the low end of the S range. +define i32 @f2(i32 %a, i32 *%src) { +; CHECK: f2: +; CHECK: s %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check the high end of the aligned S range. +define i32 @f3(i32 %a, i32 *%src) { +; CHECK: f3: +; CHECK: s %r2, 4092(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check the next word up, which should use SY instead of S. +define i32 @f4(i32 %a, i32 *%src) { +; CHECK: f4: +; CHECK: sy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check the high end of the aligned SY range. +define i32 @f5(i32 %a, i32 *%src) { +; CHECK: f5: +; CHECK: sy %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i32 %a, i32 *%src) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: s %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check the high end of the negative aligned SY range. +define i32 @f7(i32 %a, i32 *%src) { +; CHECK: f7: +; CHECK: sy %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check the low end of the SY range. +define i32 @f8(i32 %a, i32 *%src) { +; CHECK: f8: +; CHECK: sy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f9(i32 %a, i32 *%src) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: s %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check that S allows an index. +define i32 @f10(i32 %a, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: s %r2, 4092({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} + +; Check that SY allows an index. +define i32 @f11(i32 %a, i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: sy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %sub = sub i32 %a, %b + ret i32 %sub +} diff --git a/test/CodeGen/SystemZ/int-sub-02.ll b/test/CodeGen/SystemZ/int-sub-02.ll new file mode 100644 index 0000000..5150a96 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-02.ll @@ -0,0 +1,102 @@ +; Test subtractions of a sign-extended i32 from an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check SGFR. +define i64 @f1(i64 %a, i32 %b) { +; CHECK: f1: +; CHECK: sgfr %r2, %r3 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check SGF with no displacement. +define i64 @f2(i64 %a, i32 *%src) { +; CHECK: f2: +; CHECK: sgf %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %bext = sext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the high end of the aligned SGF range. +define i64 @f3(i64 %a, i32 *%src) { +; CHECK: f3: +; CHECK: sgf %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i32 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: sgf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the high end of the negative aligned SGF range. +define i64 @f5(i64 %a, i32 *%src) { +; CHECK: f5: +; CHECK: sgf %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the low end of the SGF range. +define i64 @f6(i64 %a, i32 *%src) { +; CHECK: f6: +; CHECK: sgf %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i32 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524292 +; CHECK: sgf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check that SGF allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: sgf %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %bext = sext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} diff --git a/test/CodeGen/SystemZ/int-sub-03.ll b/test/CodeGen/SystemZ/int-sub-03.ll new file mode 100644 index 0000000..73571b3 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-03.ll @@ -0,0 +1,102 @@ +; Test subtractions of a zero-extended i32 from an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check SLGFR. +define i64 @f1(i64 %a, i32 %b) { +; CHECK: f1: +; CHECK: slgfr %r2, %r3 +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check SLGF with no displacement. +define i64 @f2(i64 %a, i32 *%src) { +; CHECK: f2: +; CHECK: slgf %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the high end of the aligned SLGF range. +define i64 @f3(i64 %a, i32 *%src) { +; CHECK: f3: +; CHECK: slgf %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i32 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: slgf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the high end of the negative aligned SLGF range. +define i64 @f5(i64 %a, i32 *%src) { +; CHECK: f5: +; CHECK: slgf %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the low end of the SLGF range. +define i64 @f6(i64 %a, i32 *%src) { +; CHECK: f6: +; CHECK: slgf %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i32 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524292 +; CHECK: slgf %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check that SLGF allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: slgf %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} diff --git a/test/CodeGen/SystemZ/int-sub-04.ll b/test/CodeGen/SystemZ/int-sub-04.ll new file mode 100644 index 0000000..545d342 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-04.ll @@ -0,0 +1,94 @@ +; Test 64-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check SGR. +define i64 @f1(i64 %a, i64 %b) { +; CHECK: f1: +; CHECK: sgr %r2, %r3 +; CHECK: br %r14 + %sub = sub i64 %a, %b + ret i64 %sub +} + +; Check SG with no displacement. +define i64 @f2(i64 %a, i64 *%src) { +; CHECK: f2: +; CHECK: sg %r2, 0(%r3) +; CHECK: br %r14 + %b = load i64 *%src + %sub = sub i64 %a, %b + ret i64 %sub +} + +; Check the high end of the aligned SG range. +define i64 @f3(i64 %a, i64 *%src) { +; CHECK: f3: +; CHECK: sg %r2, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %sub = sub i64 %a, %b + ret i64 %sub +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: sg %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %sub = sub i64 %a, %b + ret i64 %sub +} + +; Check the high end of the negative aligned SG range. +define i64 @f5(i64 %a, i64 *%src) { +; CHECK: f5: +; CHECK: sg %r2, -8(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %sub = sub i64 %a, %b + ret i64 %sub +} + +; Check the low end of the SG range. +define i64 @f6(i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK: sg %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %sub = sub i64 %a, %b + ret i64 %sub +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524296 +; CHECK: sg %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %sub = sub i64 %a, %b + ret i64 %sub +} + +; Check that SG allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: sg %r2, 524280({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %sub = sub i64 %a, %b + ret i64 %sub +} diff --git a/test/CodeGen/SystemZ/int-sub-05.ll b/test/CodeGen/SystemZ/int-sub-05.ll new file mode 100644 index 0000000..1475b24 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-05.ll @@ -0,0 +1,118 @@ +; Test 128-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register addition. +define void @f1(i128 *%ptr, i64 %high, i64 %low) { +; CHECK: f1: +; CHECK: slgr {{%r[0-5]}}, %r4 +; CHECK: slbgr {{%r[0-5]}}, %r3 +; CHECK: br %r14 + %a = load i128 *%ptr + %highx = zext i64 %high to i128 + %lowx = zext i64 %low to i128 + %bhigh = shl i128 %highx, 64 + %b = or i128 %bhigh, %lowx + %sub = sub i128 %a, %b + store i128 %sub, i128 *%ptr + ret void +} + +; Test memory addition with no offset. +define void @f2(i64 %addr) { +; CHECK: f2: +; CHECK: slg {{%r[0-5]}}, 8(%r2) +; CHECK: slbg {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %bptr = inttoptr i64 %addr to i128 * + %aptr = getelementptr i128 *%bptr, i64 -8 + %a = load i128 *%aptr + %b = load i128 *%bptr + %sub = sub i128 %a, %b + store i128 %sub, i128 *%aptr + ret void +} + +; Test the highest aligned offset that is in range of both SLG and SLBG. +define void @f3(i64 %base) { +; CHECK: f3: +; CHECK: slg {{%r[0-5]}}, 524280(%r2) +; CHECK: slbg {{%r[0-5]}}, 524272(%r2) +; CHECK: br %r14 + %addr = add i64 %base, 524272 + %bptr = inttoptr i64 %addr to i128 * + %aptr = getelementptr i128 *%bptr, i64 -8 + %a = load i128 *%aptr + %b = load i128 *%bptr + %sub = sub i128 %a, %b + store i128 %sub, i128 *%aptr + ret void +} + +; Test the next doubleword up, which requires separate address logic for SLG. +define void @f4(i64 %base) { +; CHECK: f4: +; CHECK: lgr [[BASE:%r[1-5]]], %r2 +; CHECK: agfi [[BASE]], 524288 +; CHECK: slg {{%r[0-5]}}, 0([[BASE]]) +; CHECK: slbg {{%r[0-5]}}, 524280(%r2) +; CHECK: br %r14 + %addr = add i64 %base, 524280 + %bptr = inttoptr i64 %addr to i128 * + %aptr = getelementptr i128 *%bptr, i64 -8 + %a = load i128 *%aptr + %b = load i128 *%bptr + %sub = sub i128 %a, %b + store i128 %sub, i128 *%aptr + ret void +} + +; Test the next doubleword after that, which requires separate logic for +; both instructions. It would be better to create an anchor at 524288 +; that both instructions can use, but that isn't implemented yet. +define void @f5(i64 %base) { +; CHECK: f5: +; CHECK: slg {{%r[0-5]}}, 0({{%r[1-5]}}) +; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}}) +; CHECK: br %r14 + %addr = add i64 %base, 524288 + %bptr = inttoptr i64 %addr to i128 * + %aptr = getelementptr i128 *%bptr, i64 -8 + %a = load i128 *%aptr + %b = load i128 *%bptr + %sub = sub i128 %a, %b + store i128 %sub, i128 *%aptr + ret void +} + +; Test the lowest displacement that is in range of both SLG and SLBG. +define void @f6(i64 %base) { +; CHECK: f6: +; CHECK: slg {{%r[0-5]}}, -524280(%r2) +; CHECK: slbg {{%r[0-5]}}, -524288(%r2) +; CHECK: br %r14 + %addr = add i64 %base, -524288 + %bptr = inttoptr i64 %addr to i128 * + %aptr = getelementptr i128 *%bptr, i64 -8 + %a = load i128 *%aptr + %b = load i128 *%bptr + %sub = sub i128 %a, %b + store i128 %sub, i128 *%aptr + ret void +} + +; Test the next doubleword down, which is out of range of the SLBG. +define void @f7(i64 %base) { +; CHECK: f7: +; CHECK: slg {{%r[0-5]}}, -524288(%r2) +; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}}) +; CHECK: br %r14 + %addr = add i64 %base, -524296 + %bptr = inttoptr i64 %addr to i128 * + %aptr = getelementptr i128 *%bptr, i64 -8 + %a = load i128 *%aptr + %b = load i128 *%bptr + %sub = sub i128 %a, %b + store i128 %sub, i128 *%aptr + ret void +} diff --git a/test/CodeGen/SystemZ/int-sub-06.ll b/test/CodeGen/SystemZ/int-sub-06.ll new file mode 100644 index 0000000..0e04d51 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-06.ll @@ -0,0 +1,165 @@ +; Test 128-bit addition in which the second operand is a zero-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check register additions. The XOR ensures that we don't instead zero-extend +; %b into a register and use memory addition. +define void @f1(i128 *%aptr, i32 %b) { +; CHECK: f1: +; CHECK: slgfr {{%r[0-5]}}, %r3 +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %bext = zext i32 %b to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} + +; Like f1, but using an "in-register" extension. +define void @f2(i128 *%aptr, i64 %b) { +; CHECK: f2: +; CHECK: slgfr {{%r[0-5]}}, %r3 +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %trunc = trunc i64 %b to i32 + %bext = zext i32 %trunc to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} + +; Test register addition in cases where the second operand is zero extended +; from i64 rather than i32, but is later masked to i32 range. +define void @f3(i128 *%aptr, i64 %b) { +; CHECK: f3: +; CHECK: slgfr {{%r[0-5]}}, %r3 +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %bext = zext i64 %b to i128 + %and = and i128 %bext, 4294967295 + %sub = sub i128 %xor, %and + store i128 %sub, i128 *%aptr + ret void +} + +; Test SLGF with no offset. +define void @f4(i128 *%aptr, i32 *%bsrc) { +; CHECK: f4: +; CHECK: slgf {{%r[0-5]}}, 0(%r3) +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %b = load i32 *%bsrc + %bext = zext i32 %b to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} + +; Check the high end of the SLGF range. +define void @f5(i128 *%aptr, i32 *%bsrc) { +; CHECK: f5: +; CHECK: slgf {{%r[0-5]}}, 524284(%r3) +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i64 131071 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i128 *%aptr, i32 *%bsrc) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: slgf {{%r[0-5]}}, 0(%r3) +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i64 131072 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} + +; Check the high end of the negative aligned SLGF range. +define void @f7(i128 *%aptr, i32 *%bsrc) { +; CHECK: f7: +; CHECK: slgf {{%r[0-5]}}, -4(%r3) +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i128 -1 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} + +; Check the low end of the SLGF range. +define void @f8(i128 *%aptr, i32 *%bsrc) { +; CHECK: f8: +; CHECK: slgf {{%r[0-5]}}, -524288(%r3) +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i128 -131072 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f9(i128 *%aptr, i32 *%bsrc) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: slgf {{%r[0-5]}}, 0(%r3) +; CHECK: slbgr +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %ptr = getelementptr i32 *%bsrc, i128 -131073 + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} + +; Check that SLGF allows an index. +define void @f10(i128 *%aptr, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: slgf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %a = load i128 *%aptr + %xor = xor i128 %a, 127 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %bext = zext i32 %b to i128 + %sub = sub i128 %xor, %bext + store i128 %sub, i128 *%aptr + ret void +} diff --git a/test/CodeGen/SystemZ/la-01.ll b/test/CodeGen/SystemZ/la-01.ll new file mode 100644 index 0000000..b43e3f8 --- /dev/null +++ b/test/CodeGen/SystemZ/la-01.ll @@ -0,0 +1,80 @@ +; Test loads of symbolic addresses when generating small-model non-PIC. +; All addresses can be treated as PC +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@e4 = external global i32 +@d4 = global i32 1 +@e2 = external global i32, align 2 +@d2 = global i32 1, align 2 +@e1 = external global i32, align 1 +@d1 = global i32 1, align 1 + +declare void @ef() +define void @df() { + ret void +} + +; Test a load of a fully-aligned external variable. +define i32 *@f1() { +; CHECK: f1: +; CHECK: larl %r2, e4 +; CHECK-NEXT: br %r14 + ret i32 *@e4 +} + +; Test a load of a fully-aligned local variable. +define i32 *@f2() { +; CHECK: f2: +; CHECK: larl %r2, d4 +; CHECK-NEXT: br %r14 + ret i32 *@d4 +} + +; Test a load of a 2-byte-aligned external variable. +define i32 *@f3() { +; CHECK: f3: +; CHECK: larl %r2, e2 +; CHECK-NEXT: br %r14 + ret i32 *@e2 +} + +; Test a load of a 2-byte-aligned local variable. +define i32 *@f4() { +; CHECK: f4: +; CHECK: larl %r2, d2 +; CHECK-NEXT: br %r14 + ret i32 *@d2 +} + +; Test a load of an unaligned external variable, which must go via the GOT. +define i32 *@f5() { +; CHECK: f5: +; CHECK: lgrl %r2, e1@GOT +; CHECK-NEXT: br %r14 + ret i32 *@e1 +} + +; Test a load of an unaligned local variable, which must go via the GOT. +define i32 *@f6() { +; CHECK: f6: +; CHECK: lgrl %r2, d1@GOT +; CHECK-NEXT: br %r14 + ret i32 *@d1 +} + +; Test a load of an external function. +define void() *@f7() { +; CHECK: f7: +; CHECK: larl %r2, ef +; CHECK-NEXT: br %r14 + ret void() *@ef +} + +; Test a load of a local function. +define void() *@f8() { +; CHECK: f8: +; CHECK: larl %r2, df +; CHECK-NEXT: br %r14 + ret void() *@df +} diff --git a/test/CodeGen/SystemZ/la-02.ll b/test/CodeGen/SystemZ/la-02.ll new file mode 100644 index 0000000..4c5374a --- /dev/null +++ b/test/CodeGen/SystemZ/la-02.ll @@ -0,0 +1,87 @@ +; Test loads of symbolic addresses when generating medium- and +; large-model non-PIC. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -code-model=medium | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -code-model=large | FileCheck %s + +@ev = external global i32 +@dv = global i32 0 +@pv = protected global i32 0 +@hv = hidden global i32 0 + +declare void @ef() +define void @df() { + ret void +} +define protected void @pf() { + ret void +} +define hidden void @hf() { + ret void +} + +; Test loads of external variables. There is no guarantee that the +; variable will be in range of LARL. +define i32 *@f1() { +; CHECK: f1: +; CHECK: lgrl %r2, ev@GOT +; CHECK: br %r14 + ret i32 *@ev +} + +; ...likewise locally-defined normal-visibility variables. +define i32 *@f2() { +; CHECK: f2: +; CHECK: lgrl %r2, dv@GOT +; CHECK: br %r14 + ret i32 *@dv +} + +; ...likewise protected variables. +define i32 *@f3() { +; CHECK: f3: +; CHECK: lgrl %r2, pv@GOT +; CHECK: br %r14 + ret i32 *@pv +} + +; ...likewise hidden variables. +define i32 *@f4() { +; CHECK: f4: +; CHECK: lgrl %r2, hv@GOT +; CHECK: br %r14 + ret i32 *@hv +} + +; Check loads of external functions. This could use LARL, but we don't have +; code to detect that yet. +define void() *@f5() { +; CHECK: f5: +; CHECK: lgrl %r2, ef@GOT +; CHECK: br %r14 + ret void() *@ef +} + +; ...likewise locally-defined normal-visibility functions. +define void() *@f6() { +; CHECK: f6: +; CHECK: lgrl %r2, df@GOT +; CHECK: br %r14 + ret void() *@df +} + +; ...likewise protected functions. +define void() *@f7() { +; CHECK: f7: +; CHECK: lgrl %r2, pf@GOT +; CHECK: br %r14 + ret void() *@pf +} + +; ...likewise hidden functions. +define void() *@f8() { +; CHECK: f8: +; CHECK: lgrl %r2, hf@GOT +; CHECK: br %r14 + ret void() *@hf +} diff --git a/test/CodeGen/SystemZ/la-03.ll b/test/CodeGen/SystemZ/la-03.ll new file mode 100644 index 0000000..9449b2b --- /dev/null +++ b/test/CodeGen/SystemZ/la-03.ll @@ -0,0 +1,85 @@ +; Test loads of symbolic addresses in PIC code. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s + +@ev = external global i32 +@dv = global i32 0 +@pv = protected global i32 0 +@hv = hidden global i32 0 + +declare void @ef() +define void @df() { + ret void +} +define protected void @pf() { + ret void +} +define hidden void @hf() { + ret void +} + +; Test loads of external variables, which must go via the GOT. +define i32 *@f1() { +; CHECK: f1: +; CHECK: lgrl %r2, ev@GOT +; CHECK: br %r14 + ret i32 *@ev +} + +; Check loads of locally-defined normal-visibility variables, which might +; be overridden. The load must go via the GOT. +define i32 *@f2() { +; CHECK: f2: +; CHECK: lgrl %r2, dv@GOT +; CHECK: br %r14 + ret i32 *@dv +} + +; Check loads of protected variables, which in the small code model +; must be in range of LARL. +define i32 *@f3() { +; CHECK: f3: +; CHECK: larl %r2, pv +; CHECK: br %r14 + ret i32 *@pv +} + +; ...likewise hidden variables. +define i32 *@f4() { +; CHECK: f4: +; CHECK: larl %r2, hv +; CHECK: br %r14 + ret i32 *@hv +} + +; Like f1, but for functions. +define void() *@f5() { +; CHECK: f5: +; CHECK: lgrl %r2, ef@GOT +; CHECK: br %r14 + ret void() *@ef +} + +; Like f2, but for functions. +define void() *@f6() { +; CHECK: f6: +; CHECK: lgrl %r2, df@GOT +; CHECK: br %r14 + ret void() *@df +} + +; Like f3, but for functions. +define void() *@f7() { +; CHECK: f7: +; CHECK: larl %r2, pf +; CHECK: br %r14 + ret void() *@pf +} + +; Like f4, but for functions. +define void() *@f8() { +; CHECK: f8: +; CHECK: larl %r2, hf +; CHECK: br %r14 + ret void() *@hf +} diff --git a/test/CodeGen/SystemZ/la-04.ll b/test/CodeGen/SystemZ/la-04.ll new file mode 100644 index 0000000..4c36364 --- /dev/null +++ b/test/CodeGen/SystemZ/la-04.ll @@ -0,0 +1,18 @@ +; Test blockaddress. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Do some arbitrary work and return the address of the following label. +define i8 *@f1(i8 *%addr) { +; CHECK: f1: +; CHECK: mvi 0(%r2), 1 +; CHECK: [[LABEL:\.L.*]]: +; CHECK: larl %r2, [[LABEL]] +; CHECK: br %r14 +entry: + store i8 1, i8 *%addr + br label %b.lab + +b.lab: + ret i8 *blockaddress(@f1, %b.lab) +} diff --git a/test/CodeGen/SystemZ/lit.local.cfg b/test/CodeGen/SystemZ/lit.local.cfg new file mode 100644 index 0000000..79528d1 --- /dev/null +++ b/test/CodeGen/SystemZ/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'SystemZ' in targets: + config.unsupported = True + diff --git a/test/CodeGen/SystemZ/or-01.ll b/test/CodeGen/SystemZ/or-01.ll new file mode 100644 index 0000000..20c9312 --- /dev/null +++ b/test/CodeGen/SystemZ/or-01.ll @@ -0,0 +1,129 @@ +; Test 32-bit ORs in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check OR. +define i32 @f1(i32 %a, i32 %b) { +; CHECK: f1: +; CHECK: or %r2, %r3 +; CHECK: br %r14 + %or = or i32 %a, %b + ret i32 %or +} + +; Check the low end of the O range. +define i32 @f2(i32 %a, i32 *%src) { +; CHECK: f2: +; CHECK: o %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %or = or i32 %a, %b + ret i32 %or +} + +; Check the high end of the aligned O range. +define i32 @f3(i32 %a, i32 *%src) { +; CHECK: f3: +; CHECK: o %r2, 4092(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} + +; Check the next word up, which should use OY instead of O. +define i32 @f4(i32 %a, i32 *%src) { +; CHECK: f4: +; CHECK: oy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} + +; Check the high end of the aligned OY range. +define i32 @f5(i32 %a, i32 *%src) { +; CHECK: f5: +; CHECK: oy %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i32 %a, i32 *%src) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: o %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} + +; Check the high end of the negative aligned OY range. +define i32 @f7(i32 %a, i32 *%src) { +; CHECK: f7: +; CHECK: oy %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} + +; Check the low end of the OY range. +define i32 @f8(i32 %a, i32 *%src) { +; CHECK: f8: +; CHECK: oy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f9(i32 %a, i32 *%src) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: o %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} + +; Check that O allows an index. +define i32 @f10(i32 %a, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: o %r2, 4092({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} + +; Check that OY allows an index. +define i32 @f11(i32 %a, i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: oy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %or = or i32 %a, %b + ret i32 %or +} diff --git a/test/CodeGen/SystemZ/or-02.ll b/test/CodeGen/SystemZ/or-02.ll new file mode 100644 index 0000000..377a3e6 --- /dev/null +++ b/test/CodeGen/SystemZ/or-02.ll @@ -0,0 +1,66 @@ +; Test 32-bit ORs in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful OILL value. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: oill %r2, 1 +; CHECK: br %r14 + %or = or i32 %a, 1 + ret i32 %or +} + +; Check the high end of the OILL range. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK: oill %r2, 65535 +; CHECK: br %r14 + %or = or i32 %a, 65535 + ret i32 %or +} + +; Check the lowest useful OILH range, which is the next value up. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK: oilh %r2, 1 +; CHECK: br %r14 + %or = or i32 %a, 65536 + ret i32 %or +} + +; Check the lowest useful OILF value, which is the next value up again. +define i32 @f4(i32 %a) { +; CHECK: f4: +; CHECK: oilf %r2, 65537 +; CHECK: br %r14 + %or = or i32 %a, 65537 + ret i32 %or +} + +; Check the high end of the OILH range. +define i32 @f5(i32 %a) { +; CHECK: f5: +; CHECK: oilh %r2, 65535 +; CHECK: br %r14 + %or = or i32 %a, -65536 + ret i32 %or +} + +; Check the next value up, which must use OILF instead. +define i32 @f6(i32 %a) { +; CHECK: f6: +; CHECK: oilf %r2, 4294901761 +; CHECK: br %r14 + %or = or i32 %a, -65535 + ret i32 %or +} + +; Check the highest useful OILF value. +define i32 @f7(i32 %a) { +; CHECK: f7: +; CHECK: oilf %r2, 4294967294 +; CHECK: br %r14 + %or = or i32 %a, -2 + ret i32 %or +} diff --git a/test/CodeGen/SystemZ/or-03.ll b/test/CodeGen/SystemZ/or-03.ll new file mode 100644 index 0000000..16f84f1 --- /dev/null +++ b/test/CodeGen/SystemZ/or-03.ll @@ -0,0 +1,94 @@ +; Test 64-bit ORs in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check OGR. +define i64 @f1(i64 %a, i64 %b) { +; CHECK: f1: +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %or = or i64 %a, %b + ret i64 %or +} + +; Check OG with no displacement. +define i64 @f2(i64 %a, i64 *%src) { +; CHECK: f2: +; CHECK: og %r2, 0(%r3) +; CHECK: br %r14 + %b = load i64 *%src + %or = or i64 %a, %b + ret i64 %or +} + +; Check the high end of the aligned OG range. +define i64 @f3(i64 %a, i64 *%src) { +; CHECK: f3: +; CHECK: og %r2, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %or = or i64 %a, %b + ret i64 %or +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: og %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %or = or i64 %a, %b + ret i64 %or +} + +; Check the high end of the negative aligned OG range. +define i64 @f5(i64 %a, i64 *%src) { +; CHECK: f5: +; CHECK: og %r2, -8(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %or = or i64 %a, %b + ret i64 %or +} + +; Check the low end of the OG range. +define i64 @f6(i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK: og %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %or = or i64 %a, %b + ret i64 %or +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524296 +; CHECK: og %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %or = or i64 %a, %b + ret i64 %or +} + +; Check that OG allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: og %r2, 524280({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %or = or i64 %a, %b + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/or-04.ll b/test/CodeGen/SystemZ/or-04.ll new file mode 100644 index 0000000..a827842 --- /dev/null +++ b/test/CodeGen/SystemZ/or-04.ll @@ -0,0 +1,182 @@ +; Test 64-bit ORs in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful OILL value. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: oill %r2, 1 +; CHECK: br %r14 + %or = or i64 %a, 1 + ret i64 %or +} + +; Check the high end of the OILL range. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: oill %r2, 65535 +; CHECK: br %r14 + %or = or i64 %a, 65535 + ret i64 %or +} + +; Check the lowest useful OILH value, which is the next value up. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK: oilh %r2, 1 +; CHECK: br %r14 + %or = or i64 %a, 65536 + ret i64 %or +} + +; Check the lowest useful OILF value, which is the next value up again. +define i64 @f4(i64 %a) { +; CHECK: f4: +; CHECK: oilf %r2, 4294901759 +; CHECK: br %r14 + %or = or i64 %a, 4294901759 + ret i64 %or +} + +; Check the high end of the OILH range. +define i64 @f5(i64 %a) { +; CHECK: f5: +; CHECK: oilh %r2, 65535 +; CHECK: br %r14 + %or = or i64 %a, 4294901760 + ret i64 %or +} + +; Check the high end of the OILF range. +define i64 @f6(i64 %a) { +; CHECK: f6: +; CHECK: oilf %r2, 4294967295 +; CHECK: br %r14 + %or = or i64 %a, 4294967295 + ret i64 %or +} + +; Check the lowest useful OIHL value, which is the next value up. +define i64 @f7(i64 %a) { +; CHECK: f7: +; CHECK: oihl %r2, 1 +; CHECK: br %r14 + %or = or i64 %a, 4294967296 + ret i64 %or +} + +; Check the next value up again, which must use two ORs. +define i64 @f8(i64 %a) { +; CHECK: f8: +; CHECK: oihl %r2, 1 +; CHECK: oill %r2, 1 +; CHECK: br %r14 + %or = or i64 %a, 4294967297 + ret i64 %or +} + +; Check the high end of the OILL range. +define i64 @f9(i64 %a) { +; CHECK: f9: +; CHECK: oihl %r2, 1 +; CHECK: oill %r2, 65535 +; CHECK: br %r14 + %or = or i64 %a, 4295032831 + ret i64 %or +} + +; Check the next value up, which must use OILH +define i64 @f10(i64 %a) { +; CHECK: f10: +; CHECK: oihl %r2, 1 +; CHECK: oilh %r2, 1 +; CHECK: br %r14 + %or = or i64 %a, 4295032832 + ret i64 %or +} + +; Check the next value up again, which must use OILF +define i64 @f11(i64 %a) { +; CHECK: f11: +; CHECK: oihl %r2, 1 +; CHECK: oilf %r2, 65537 +; CHECK: br %r14 + %or = or i64 %a, 4295032833 + ret i64 %or +} + +; Check the high end of the OIHL range. +define i64 @f12(i64 %a) { +; CHECK: f12: +; CHECK: oihl %r2, 65535 +; CHECK: br %r14 + %or = or i64 %a, 281470681743360 + ret i64 %or +} + +; Check a combination of the high end of the OIHL range and the high end +; of the OILF range. +define i64 @f13(i64 %a) { +; CHECK: f13: +; CHECK: oihl %r2, 65535 +; CHECK: oilf %r2, 4294967295 +; CHECK: br %r14 + %or = or i64 %a, 281474976710655 + ret i64 %or +} + +; Check the lowest useful OIHH value. +define i64 @f14(i64 %a) { +; CHECK: f14: +; CHECK: oihh %r2, 1 +; CHECK: br %r14 + %or = or i64 %a, 281474976710656 + ret i64 %or +} + +; Check the next value up, which needs two ORs. +define i64 @f15(i64 %a) { +; CHECK: f15: +; CHECK: oihh %r2, 1 +; CHECK: oill %r2, 1 +; CHECK: br %r14 + %or = or i64 %a, 281474976710657 + ret i64 %or +} + +; Check the lowest useful OIHF value. +define i64 @f16(i64 %a) { +; CHECK: f16: +; CHECK: oihf %r2, 65537 +; CHECK: br %r14 + %or = or i64 %a, 281479271677952 + ret i64 %or +} + +; Check the high end of the OIHH range. +define i64 @f17(i64 %a) { +; CHECK: f17: +; CHECK: oihh %r2, 65535 +; CHECK: br %r14 + %or = or i64 %a, 18446462598732840960 + ret i64 %or +} + +; Check the high end of the OIHF range. +define i64 @f18(i64 %a) { +; CHECK: f18: +; CHECK: oihf %r2, 4294967295 +; CHECK: br %r14 + %or = or i64 %a, -4294967296 + ret i64 %or +} + +; Check the highest useful OR value. +define i64 @f19(i64 %a) { +; CHECK: f19: +; CHECK: oihf %r2, 4294967295 +; CHECK: oilf %r2, 4294967294 +; CHECK: br %r14 + %or = or i64 %a, -2 + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/or-05.ll b/test/CodeGen/SystemZ/or-05.ll new file mode 100644 index 0000000..9b6c10d --- /dev/null +++ b/test/CodeGen/SystemZ/or-05.ll @@ -0,0 +1,165 @@ +; Test ORs of a constant into a byte of memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful constant, expressed as a signed integer. +define void @f1(i8 *%ptr) { +; CHECK: f1: +; CHECK: oi 0(%r2), 1 +; CHECK: br %r14 + %val = load i8 *%ptr + %or = or i8 %val, -255 + store i8 %or, i8 *%ptr + ret void +} + +; Check the highest useful constant, expressed as a signed integer. +define void @f2(i8 *%ptr) { +; CHECK: f2: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %or = or i8 %val, -2 + store i8 %or, i8 *%ptr + ret void +} + +; Check the lowest useful constant, expressed as an unsigned integer. +define void @f3(i8 *%ptr) { +; CHECK: f3: +; CHECK: oi 0(%r2), 1 +; CHECK: br %r14 + %val = load i8 *%ptr + %or = or i8 %val, 1 + store i8 %or, i8 *%ptr + ret void +} + +; Check the highest useful constant, expressed as a unsigned integer. +define void @f4(i8 *%ptr) { +; CHECK: f4: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %or = or i8 %val, 254 + store i8 %or, i8 *%ptr + ret void +} + +; Check the high end of the OI range. +define void @f5(i8 *%src) { +; CHECK: f5: +; CHECK: oi 4095(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4095 + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} + +; Check the next byte up, which should use OIY instead of OI. +define void @f6(i8 *%src) { +; CHECK: f6: +; CHECK: oiy 4096(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4096 + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} + +; Check the high end of the OIY range. +define void @f7(i8 *%src) { +; CHECK: f7: +; CHECK: oiy 524287(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(i8 *%src) { +; CHECK: f8: +; CHECK: agfi %r2, 524288 +; CHECK: oi 0(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} + +; Check the high end of the negative OIY range. +define void @f9(i8 *%src) { +; CHECK: f9: +; CHECK: oiy -1(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} + +; Check the low end of the OIY range. +define void @f10(i8 *%src) { +; CHECK: f10: +; CHECK: oiy -524288(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f11(i8 *%src) { +; CHECK: f11: +; CHECK: agfi %r2, -524289 +; CHECK: oi 0(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} + +; Check that OI does not allow an index +define void @f12(i64 %src, i64 %index) { +; CHECK: f12: +; CHECK: agr %r2, %r3 +; CHECK: oi 4095(%r2), 127 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to i8 * + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} + +; Check that OIY does not allow an index +define void @f13(i64 %src, i64 %index) { +; CHECK: f13: +; CHECK: agr %r2, %r3 +; CHECK: oiy 4096(%r2), 127 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i8 * + %val = load i8 *%ptr + %or = or i8 %val, 127 + store i8 %or, i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/or-06.ll b/test/CodeGen/SystemZ/or-06.ll new file mode 100644 index 0000000..a24a18a --- /dev/null +++ b/test/CodeGen/SystemZ/or-06.ll @@ -0,0 +1,108 @@ +; Test that we can use OI for byte operations that are expressed as i32 +; or i64 operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Zero extension to 32 bits, negative constant. +define void @f1(i8 *%ptr) { +; CHECK: f1: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %or = or i32 %ext, -2 + %trunc = trunc i32 %or to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 64 bits, negative constant. +define void @f2(i8 *%ptr) { +; CHECK: f2: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %or = or i64 %ext, -2 + %trunc = trunc i64 %or to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 32 bits, positive constant. +define void @f3(i8 *%ptr) { +; CHECK: f3: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %or = or i32 %ext, 254 + %trunc = trunc i32 %or to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 64 bits, positive constant. +define void @f4(i8 *%ptr) { +; CHECK: f4: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %or = or i64 %ext, 254 + %trunc = trunc i64 %or to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 32 bits, negative constant. +define void @f5(i8 *%ptr) { +; CHECK: f5: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %or = or i32 %ext, -2 + %trunc = trunc i32 %or to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 64 bits, negative constant. +define void @f6(i8 *%ptr) { +; CHECK: f6: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %or = or i64 %ext, -2 + %trunc = trunc i64 %or to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 32 bits, positive constant. +define void @f7(i8 *%ptr) { +; CHECK: f7: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %or = or i32 %ext, 254 + %trunc = trunc i32 %or to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 64 bits, positive constant. +define void @f8(i8 *%ptr) { +; CHECK: f8: +; CHECK: oi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %or = or i64 %ext, 254 + %trunc = trunc i64 %or to i8 + store i8 %trunc, i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/shift-01.ll b/test/CodeGen/SystemZ/shift-01.ll new file mode 100644 index 0000000..e5a459a --- /dev/null +++ b/test/CodeGen/SystemZ/shift-01.ll @@ -0,0 +1,114 @@ +; Test 32-bit shifts left. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the SLL range. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: sll %r2, 1 +; CHECK: br %r14 + %shift = shl i32 %a, 1 + ret i32 %shift +} + +; Check the high end of the defined SLL range. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK: sll %r2, 31 +; CHECK: br %r14 + %shift = shl i32 %a, 31 + ret i32 %shift +} + +; We don't generate shifts by out-of-range values. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK-NOT: sll %r2, 32 +; CHECK: br %r14 + %shift = shl i32 %a, 32 + ret i32 %shift +} + +; Make sure that we don't generate negative shift amounts. +define i32 @f4(i32 %a, i32 %amt) { +; CHECK: f4: +; CHECK-NOT: sll %r2, -1{{.*}} +; CHECK: br %r14 + %sub = sub i32 %amt, 1 + %shift = shl i32 %a, %sub + ret i32 %shift +} + +; Check variable shifts. +define i32 @f5(i32 %a, i32 %amt) { +; CHECK: f5: +; CHECK: sll %r2, 0(%r3) +; CHECK: br %r14 + %shift = shl i32 %a, %amt + ret i32 %shift +} + +; Check shift amounts that have a constant term. +define i32 @f6(i32 %a, i32 %amt) { +; CHECK: f6: +; CHECK: sll %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %shift = shl i32 %a, %add + ret i32 %shift +} + +; ...and again with a truncated 64-bit shift amount. +define i32 @f7(i32 %a, i64 %amt) { +; CHECK: f7: +; CHECK: sll %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %trunc = trunc i64 %add to i32 + %shift = shl i32 %a, %trunc + ret i32 %shift +} + +; Check shift amounts that have the largest in-range constant term. We could +; mask the amount instead. +define i32 @f8(i32 %a, i32 %amt) { +; CHECK: f8: +; CHECK: sll %r2, 4095(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 4095 + %shift = shl i32 %a, %add + ret i32 %shift +} + +; Check the next value up. Again, we could mask the amount instead. +define i32 @f9(i32 %a, i32 %amt) { +; CHECK: f9: +; CHECK: ahi %r3, 4096 +; CHECK: sll %r2, 0(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 4096 + %shift = shl i32 %a, %add + ret i32 %shift +} + +; Check that we don't try to generate "indexed" shifts. +define i32 @f10(i32 %a, i32 %b, i32 %c) { +; CHECK: f10: +; CHECK: ar {{%r3, %r4|%r4, %r3}} +; CHECK: sll %r2, 0({{%r[34]}}) +; CHECK: br %r14 + %add = add i32 %b, %c + %shift = shl i32 %a, %add + ret i32 %shift +} + +; Check that the shift amount uses an address register. It cannot be in %r0. +define i32 @f11(i32 %a, i32 *%ptr) { +; CHECK: f11: +; CHECK: l %r1, 0(%r3) +; CHECK: sll %r2, 0(%r1) +; CHECK: br %r14 + %amt = load i32 *%ptr + %shift = shl i32 %a, %amt + ret i32 %shift +} diff --git a/test/CodeGen/SystemZ/shift-02.ll b/test/CodeGen/SystemZ/shift-02.ll new file mode 100644 index 0000000..38093a8 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-02.ll @@ -0,0 +1,114 @@ +; Test 32-bit logical shifts right. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the SRL range. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: srl %r2, 1 +; CHECK: br %r14 + %shift = lshr i32 %a, 1 + ret i32 %shift +} + +; Check the high end of the defined SRL range. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %shift = lshr i32 %a, 31 + ret i32 %shift +} + +; We don't generate shifts by out-of-range values. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK-NOT: srl %r2, 32 +; CHECK: br %r14 + %shift = lshr i32 %a, 32 + ret i32 %shift +} + +; Make sure that we don't generate negative shift amounts. +define i32 @f4(i32 %a, i32 %amt) { +; CHECK: f4: +; CHECK-NOT: srl %r2, -1{{.*}} +; CHECK: br %r14 + %sub = sub i32 %amt, 1 + %shift = lshr i32 %a, %sub + ret i32 %shift +} + +; Check variable shifts. +define i32 @f5(i32 %a, i32 %amt) { +; CHECK: f5: +; CHECK: srl %r2, 0(%r3) +; CHECK: br %r14 + %shift = lshr i32 %a, %amt + ret i32 %shift +} + +; Check shift amounts that have a constant term. +define i32 @f6(i32 %a, i32 %amt) { +; CHECK: f6: +; CHECK: srl %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %shift = lshr i32 %a, %add + ret i32 %shift +} + +; ...and again with a truncated 64-bit shift amount. +define i32 @f7(i32 %a, i64 %amt) { +; CHECK: f7: +; CHECK: srl %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %trunc = trunc i64 %add to i32 + %shift = lshr i32 %a, %trunc + ret i32 %shift +} + +; Check shift amounts that have the largest in-range constant term. We could +; mask the amount instead. +define i32 @f8(i32 %a, i32 %amt) { +; CHECK: f8: +; CHECK: srl %r2, 4095(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 4095 + %shift = lshr i32 %a, %add + ret i32 %shift +} + +; Check the next value up. Again, we could mask the amount instead. +define i32 @f9(i32 %a, i32 %amt) { +; CHECK: f9: +; CHECK: ahi %r3, 4096 +; CHECK: srl %r2, 0(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 4096 + %shift = lshr i32 %a, %add + ret i32 %shift +} + +; Check that we don't try to generate "indexed" shifts. +define i32 @f10(i32 %a, i32 %b, i32 %c) { +; CHECK: f10: +; CHECK: ar {{%r3, %r4|%r4, %r3}} +; CHECK: srl %r2, 0({{%r[34]}}) +; CHECK: br %r14 + %add = add i32 %b, %c + %shift = lshr i32 %a, %add + ret i32 %shift +} + +; Check that the shift amount uses an address register. It cannot be in %r0. +define i32 @f11(i32 %a, i32 *%ptr) { +; CHECK: f11: +; CHECK: l %r1, 0(%r3) +; CHECK: srl %r2, 0(%r1) +; CHECK: br %r14 + %amt = load i32 *%ptr + %shift = lshr i32 %a, %amt + ret i32 %shift +} diff --git a/test/CodeGen/SystemZ/shift-03.ll b/test/CodeGen/SystemZ/shift-03.ll new file mode 100644 index 0000000..ca510f3 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-03.ll @@ -0,0 +1,114 @@ +; Test 32-bit arithmetic shifts right. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the SRA range. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: sra %r2, 1 +; CHECK: br %r14 + %shift = ashr i32 %a, 1 + ret i32 %shift +} + +; Check the high end of the defined SRA range. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK: sra %r2, 31 +; CHECK: br %r14 + %shift = ashr i32 %a, 31 + ret i32 %shift +} + +; We don't generate shifts by out-of-range values. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK-NOT: sra %r2, 32 +; CHECK: br %r14 + %shift = ashr i32 %a, 32 + ret i32 %shift +} + +; Make sure that we don't generate negative shift amounts. +define i32 @f4(i32 %a, i32 %amt) { +; CHECK: f4: +; CHECK-NOT: sra %r2, -1{{.*}} +; CHECK: br %r14 + %sub = sub i32 %amt, 1 + %shift = ashr i32 %a, %sub + ret i32 %shift +} + +; Check variable shifts. +define i32 @f5(i32 %a, i32 %amt) { +; CHECK: f5: +; CHECK: sra %r2, 0(%r3) +; CHECK: br %r14 + %shift = ashr i32 %a, %amt + ret i32 %shift +} + +; Check shift amounts that have a constant term. +define i32 @f6(i32 %a, i32 %amt) { +; CHECK: f6: +; CHECK: sra %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %shift = ashr i32 %a, %add + ret i32 %shift +} + +; ...and again with a truncated 64-bit shift amount. +define i32 @f7(i32 %a, i64 %amt) { +; CHECK: f7: +; CHECK: sra %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %trunc = trunc i64 %add to i32 + %shift = ashr i32 %a, %trunc + ret i32 %shift +} + +; Check shift amounts that have the largest in-range constant term. We could +; mask the amount instead. +define i32 @f8(i32 %a, i32 %amt) { +; CHECK: f8: +; CHECK: sra %r2, 4095(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 4095 + %shift = ashr i32 %a, %add + ret i32 %shift +} + +; Check the next value up. Again, we could mask the amount instead. +define i32 @f9(i32 %a, i32 %amt) { +; CHECK: f9: +; CHECK: ahi %r3, 4096 +; CHECK: sra %r2, 0(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 4096 + %shift = ashr i32 %a, %add + ret i32 %shift +} + +; Check that we don't try to generate "indexed" shifts. +define i32 @f10(i32 %a, i32 %b, i32 %c) { +; CHECK: f10: +; CHECK: ar {{%r3, %r4|%r4, %r3}} +; CHECK: sra %r2, 0({{%r[34]}}) +; CHECK: br %r14 + %add = add i32 %b, %c + %shift = ashr i32 %a, %add + ret i32 %shift +} + +; Check that the shift amount uses an address register. It cannot be in %r0. +define i32 @f11(i32 %a, i32 *%ptr) { +; CHECK: f11: +; CHECK: l %r1, 0(%r3) +; CHECK: sra %r2, 0(%r1) +; CHECK: br %r14 + %amt = load i32 *%ptr + %shift = ashr i32 %a, %amt + ret i32 %shift +} diff --git a/test/CodeGen/SystemZ/shift-04.ll b/test/CodeGen/SystemZ/shift-04.ll new file mode 100644 index 0000000..0146a86 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-04.ll @@ -0,0 +1,189 @@ +; Test 32-bit rotates left. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the RLL range. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: rll %r2, %r2, 1 +; CHECK: br %r14 + %parta = shl i32 %a, 1 + %partb = lshr i32 %a, 31 + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check the high end of the defined RLL range. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK: rll %r2, %r2, 31 +; CHECK: br %r14 + %parta = shl i32 %a, 31 + %partb = lshr i32 %a, 1 + %or = or i32 %parta, %partb + ret i32 %or +} + +; We don't generate shifts by out-of-range values. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK-NOT: rll +; CHECK: br %r14 + %parta = shl i32 %a, 32 + %partb = lshr i32 %a, 0 + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check variable shifts. +define i32 @f4(i32 %a, i32 %amt) { +; CHECK: f4: +; CHECK: rll %r2, %r2, 0(%r3) +; CHECK: br %r14 + %amtb = sub i32 32, %amt + %parta = shl i32 %a, %amt + %partb = lshr i32 %a, %amtb + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check shift amounts that have a constant term. +define i32 @f5(i32 %a, i32 %amt) { +; CHECK: f5: +; CHECK: rll %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %sub = sub i32 32, %add + %parta = shl i32 %a, %add + %partb = lshr i32 %a, %sub + %or = or i32 %parta, %partb + ret i32 %or +} + +; ...and again with a truncated 64-bit shift amount. +define i32 @f6(i32 %a, i64 %amt) { +; CHECK: f6: +; CHECK: rll %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %addtrunc = trunc i64 %add to i32 + %sub = sub i32 32, %addtrunc + %parta = shl i32 %a, %addtrunc + %partb = lshr i32 %a, %sub + %or = or i32 %parta, %partb + ret i32 %or +} + +; ...and again with a different truncation representation. +define i32 @f7(i32 %a, i64 %amt) { +; CHECK: f7: +; CHECK: rll %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %sub = sub i64 32, %add + %addtrunc = trunc i64 %add to i32 + %subtrunc = trunc i64 %sub to i32 + %parta = shl i32 %a, %addtrunc + %partb = lshr i32 %a, %subtrunc + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check shift amounts that have the largest in-range constant term. We could +; mask the amount instead. +define i32 @f8(i32 %a, i32 %amt) { +; CHECK: f8: +; CHECK: rll %r2, %r2, 524287(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 524287 + %sub = sub i32 32, %add + %parta = shl i32 %a, %add + %partb = lshr i32 %a, %sub + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check the next value up, which without masking must use a separate +; addition. +define i32 @f9(i32 %a, i32 %amt) { +; CHECK: f9: +; CHECK: afi %r3, 524288 +; CHECK: rll %r2, %r2, 0(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 524288 + %sub = sub i32 32, %add + %parta = shl i32 %a, %add + %partb = lshr i32 %a, %sub + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check cases where 1 is subtracted from the shift amount. +define i32 @f10(i32 %a, i32 %amt) { +; CHECK: f10: +; CHECK: rll %r2, %r2, -1(%r3) +; CHECK: br %r14 + %suba = sub i32 %amt, 1 + %subb = sub i32 32, %suba + %parta = shl i32 %a, %suba + %partb = lshr i32 %a, %subb + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check the lowest value that can be subtracted from the shift amount. +; Again, we could mask the shift amount instead. +define i32 @f11(i32 %a, i32 %amt) { +; CHECK: f11: +; CHECK: rll %r2, %r2, -524288(%r3) +; CHECK: br %r14 + %suba = sub i32 %amt, 524288 + %subb = sub i32 32, %suba + %parta = shl i32 %a, %suba + %partb = lshr i32 %a, %subb + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check the next value down, which without masking must use a separate +; addition. +define i32 @f12(i32 %a, i32 %amt) { +; CHECK: f12: +; CHECK: afi %r3, -524289 +; CHECK: rll %r2, %r2, 0(%r3) +; CHECK: br %r14 + %suba = sub i32 %amt, 524289 + %subb = sub i32 32, %suba + %parta = shl i32 %a, %suba + %partb = lshr i32 %a, %subb + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check that we don't try to generate "indexed" shifts. +define i32 @f13(i32 %a, i32 %b, i32 %c) { +; CHECK: f13: +; CHECK: ar {{%r3, %r4|%r4, %r3}} +; CHECK: rll %r2, %r2, 0({{%r[34]}}) +; CHECK: br %r14 + %add = add i32 %b, %c + %sub = sub i32 32, %add + %parta = shl i32 %a, %add + %partb = lshr i32 %a, %sub + %or = or i32 %parta, %partb + ret i32 %or +} + +; Check that the shift amount uses an address register. It cannot be in %r0. +define i32 @f14(i32 %a, i32 *%ptr) { +; CHECK: f14: +; CHECK: l %r1, 0(%r3) +; CHECK: rll %r2, %r2, 0(%r1) +; CHECK: br %r14 + %amt = load i32 *%ptr + %amtb = sub i32 32, %amt + %parta = shl i32 %a, %amt + %partb = lshr i32 %a, %amtb + %or = or i32 %parta, %partb + ret i32 %or +} diff --git a/test/CodeGen/SystemZ/shift-05.ll b/test/CodeGen/SystemZ/shift-05.ll new file mode 100644 index 0000000..8c0ca93 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-05.ll @@ -0,0 +1,149 @@ +; Test 32-bit shifts left. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the SLLG range. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: sllg %r2, %r2, 1 +; CHECK: br %r14 + %shift = shl i64 %a, 1 + ret i64 %shift +} + +; Check the high end of the defined SLLG range. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: sllg %r2, %r2, 63 +; CHECK: br %r14 + %shift = shl i64 %a, 63 + ret i64 %shift +} + +; We don't generate shifts by out-of-range values. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK-NOT: sllg +; CHECK: br %r14 + %shift = shl i64 %a, 64 + ret i64 %shift +} + +; Check variable shifts. +define i64 @f4(i64 %a, i64 %amt) { +; CHECK: f4: +; CHECK: sllg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %shift = shl i64 %a, %amt + ret i64 %shift +} + +; Check shift amounts that have a constant term. +define i64 @f5(i64 %a, i64 %amt) { +; CHECK: f5: +; CHECK: sllg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %shift = shl i64 %a, %add + ret i64 %shift +} + +; ...and again with a sign-extended 32-bit shift amount. +define i64 @f6(i64 %a, i32 %amt) { +; CHECK: f6: +; CHECK: sllg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %addext = sext i32 %add to i64 + %shift = shl i64 %a, %addext + ret i64 %shift +} + +; ...and now with a zero-extended 32-bit shift amount. +define i64 @f7(i64 %a, i32 %amt) { +; CHECK: f7: +; CHECK: sllg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %addext = zext i32 %add to i64 + %shift = shl i64 %a, %addext + ret i64 %shift +} + +; Check shift amounts that have the largest in-range constant term. We could +; mask the amount instead. +define i64 @f8(i64 %a, i64 %amt) { +; CHECK: f8: +; CHECK: sllg %r2, %r2, 524287(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 524287 + %shift = shl i64 %a, %add + ret i64 %shift +} + +; Check the next value up, which without masking must use a separate +; addition. +define i64 @f9(i64 %a, i64 %amt) { +; CHECK: f9: +; CHECK: a{{g?}}fi %r3, 524288 +; CHECK: sllg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 524288 + %shift = shl i64 %a, %add + ret i64 %shift +} + +; Check cases where 1 is subtracted from the shift amount. +define i64 @f10(i64 %a, i64 %amt) { +; CHECK: f10: +; CHECK: sllg %r2, %r2, -1(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 1 + %shift = shl i64 %a, %sub + ret i64 %shift +} + +; Check the lowest value that can be subtracted from the shift amount. +; Again, we could mask the shift amount instead. +define i64 @f11(i64 %a, i64 %amt) { +; CHECK: f11: +; CHECK: sllg %r2, %r2, -524288(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 524288 + %shift = shl i64 %a, %sub + ret i64 %shift +} + +; Check the next value down, which without masking must use a separate +; addition. +define i64 @f12(i64 %a, i64 %amt) { +; CHECK: f12: +; CHECK: a{{g?}}fi %r3, -524289 +; CHECK: sllg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 524289 + %shift = shl i64 %a, %sub + ret i64 %shift +} + +; Check that we don't try to generate "indexed" shifts. +define i64 @f13(i64 %a, i64 %b, i64 %c) { +; CHECK: f13: +; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}} +; CHECK: sllg %r2, %r2, 0({{%r[34]}}) +; CHECK: br %r14 + %add = add i64 %b, %c + %shift = shl i64 %a, %add + ret i64 %shift +} + +; Check that the shift amount uses an address register. It cannot be in %r0. +define i64 @f14(i64 %a, i64 *%ptr) { +; CHECK: f14: +; CHECK: l %r1, 4(%r3) +; CHECK: sllg %r2, %r2, 0(%r1) +; CHECK: br %r14 + %amt = load i64 *%ptr + %shift = shl i64 %a, %amt + ret i64 %shift +} diff --git a/test/CodeGen/SystemZ/shift-06.ll b/test/CodeGen/SystemZ/shift-06.ll new file mode 100644 index 0000000..5f600b4 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-06.ll @@ -0,0 +1,149 @@ +; Test 32-bit logical shifts right. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the SRLG range. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: srlg %r2, %r2, 1 +; CHECK: br %r14 + %shift = lshr i64 %a, 1 + ret i64 %shift +} + +; Check the high end of the defined SRLG range. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: srlg %r2, %r2, 63 +; CHECK: br %r14 + %shift = lshr i64 %a, 63 + ret i64 %shift +} + +; We don't generate shifts by out-of-range values. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK-NOT: srlg +; CHECK: br %r14 + %shift = lshr i64 %a, 64 + ret i64 %shift +} + +; Check variable shifts. +define i64 @f4(i64 %a, i64 %amt) { +; CHECK: f4: +; CHECK: srlg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %shift = lshr i64 %a, %amt + ret i64 %shift +} + +; Check shift amounts that have a constant term. +define i64 @f5(i64 %a, i64 %amt) { +; CHECK: f5: +; CHECK: srlg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %shift = lshr i64 %a, %add + ret i64 %shift +} + +; ...and again with a sign-extended 32-bit shift amount. +define i64 @f6(i64 %a, i32 %amt) { +; CHECK: f6: +; CHECK: srlg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %addext = sext i32 %add to i64 + %shift = lshr i64 %a, %addext + ret i64 %shift +} + +; ...and now with a zero-extended 32-bit shift amount. +define i64 @f7(i64 %a, i32 %amt) { +; CHECK: f7: +; CHECK: srlg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %addext = zext i32 %add to i64 + %shift = lshr i64 %a, %addext + ret i64 %shift +} + +; Check shift amounts that have the largest in-range constant term. We could +; mask the amount instead. +define i64 @f8(i64 %a, i64 %amt) { +; CHECK: f8: +; CHECK: srlg %r2, %r2, 524287(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 524287 + %shift = lshr i64 %a, %add + ret i64 %shift +} + +; Check the next value up, which without masking must use a separate +; addition. +define i64 @f9(i64 %a, i64 %amt) { +; CHECK: f9: +; CHECK: a{{g?}}fi %r3, 524288 +; CHECK: srlg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 524288 + %shift = lshr i64 %a, %add + ret i64 %shift +} + +; Check cases where 1 is subtracted from the shift amount. +define i64 @f10(i64 %a, i64 %amt) { +; CHECK: f10: +; CHECK: srlg %r2, %r2, -1(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 1 + %shift = lshr i64 %a, %sub + ret i64 %shift +} + +; Check the lowest value that can be subtracted from the shift amount. +; Again, we could mask the shift amount instead. +define i64 @f11(i64 %a, i64 %amt) { +; CHECK: f11: +; CHECK: srlg %r2, %r2, -524288(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 524288 + %shift = lshr i64 %a, %sub + ret i64 %shift +} + +; Check the next value down, which without masking must use a separate +; addition. +define i64 @f12(i64 %a, i64 %amt) { +; CHECK: f12: +; CHECK: a{{g?}}fi %r3, -524289 +; CHECK: srlg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 524289 + %shift = lshr i64 %a, %sub + ret i64 %shift +} + +; Check that we don't try to generate "indexed" shifts. +define i64 @f13(i64 %a, i64 %b, i64 %c) { +; CHECK: f13: +; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}} +; CHECK: srlg %r2, %r2, 0({{%r[34]}}) +; CHECK: br %r14 + %add = add i64 %b, %c + %shift = lshr i64 %a, %add + ret i64 %shift +} + +; Check that the shift amount uses an address register. It cannot be in %r0. +define i64 @f14(i64 %a, i64 *%ptr) { +; CHECK: f14: +; CHECK: l %r1, 4(%r3) +; CHECK: srlg %r2, %r2, 0(%r1) +; CHECK: br %r14 + %amt = load i64 *%ptr + %shift = lshr i64 %a, %amt + ret i64 %shift +} diff --git a/test/CodeGen/SystemZ/shift-07.ll b/test/CodeGen/SystemZ/shift-07.ll new file mode 100644 index 0000000..ef583e8 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-07.ll @@ -0,0 +1,149 @@ +; Test 32-bit arithmetic shifts right. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the SRAG range. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: srag %r2, %r2, 1 +; CHECK: br %r14 + %shift = ashr i64 %a, 1 + ret i64 %shift +} + +; Check the high end of the defined SRAG range. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: srag %r2, %r2, 63 +; CHECK: br %r14 + %shift = ashr i64 %a, 63 + ret i64 %shift +} + +; We don't generate shifts by out-of-range values. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK-NOT: srag +; CHECK: br %r14 + %shift = ashr i64 %a, 64 + ret i64 %shift +} + +; Check variable shifts. +define i64 @f4(i64 %a, i64 %amt) { +; CHECK: f4: +; CHECK: srag %r2, %r2, 0(%r3) +; CHECK: br %r14 + %shift = ashr i64 %a, %amt + ret i64 %shift +} + +; Check shift amounts that have a constant term. +define i64 @f5(i64 %a, i64 %amt) { +; CHECK: f5: +; CHECK: srag %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %shift = ashr i64 %a, %add + ret i64 %shift +} + +; ...and again with a sign-extended 32-bit shift amount. +define i64 @f6(i64 %a, i32 %amt) { +; CHECK: f6: +; CHECK: srag %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %addext = sext i32 %add to i64 + %shift = ashr i64 %a, %addext + ret i64 %shift +} + +; ...and now with a zero-extended 32-bit shift amount. +define i64 @f7(i64 %a, i32 %amt) { +; CHECK: f7: +; CHECK: srag %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %addext = zext i32 %add to i64 + %shift = ashr i64 %a, %addext + ret i64 %shift +} + +; Check shift amounts that have the largest in-range constant term. We could +; mask the amount instead. +define i64 @f8(i64 %a, i64 %amt) { +; CHECK: f8: +; CHECK: srag %r2, %r2, 524287(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 524287 + %shift = ashr i64 %a, %add + ret i64 %shift +} + +; Check the next value up, which without masking must use a separate +; addition. +define i64 @f9(i64 %a, i64 %amt) { +; CHECK: f9: +; CHECK: a{{g?}}fi %r3, 524288 +; CHECK: srag %r2, %r2, 0(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 524288 + %shift = ashr i64 %a, %add + ret i64 %shift +} + +; Check cases where 1 is subtracted from the shift amount. +define i64 @f10(i64 %a, i64 %amt) { +; CHECK: f10: +; CHECK: srag %r2, %r2, -1(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 1 + %shift = ashr i64 %a, %sub + ret i64 %shift +} + +; Check the lowest value that can be subtracted from the shift amount. +; Again, we could mask the shift amount instead. +define i64 @f11(i64 %a, i64 %amt) { +; CHECK: f11: +; CHECK: srag %r2, %r2, -524288(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 524288 + %shift = ashr i64 %a, %sub + ret i64 %shift +} + +; Check the next value down, which without masking must use a separate +; addition. +define i64 @f12(i64 %a, i64 %amt) { +; CHECK: f12: +; CHECK: a{{g?}}fi %r3, -524289 +; CHECK: srag %r2, %r2, 0(%r3) +; CHECK: br %r14 + %sub = sub i64 %amt, 524289 + %shift = ashr i64 %a, %sub + ret i64 %shift +} + +; Check that we don't try to generate "indexed" shifts. +define i64 @f13(i64 %a, i64 %b, i64 %c) { +; CHECK: f13: +; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}} +; CHECK: srag %r2, %r2, 0({{%r[34]}}) +; CHECK: br %r14 + %add = add i64 %b, %c + %shift = ashr i64 %a, %add + ret i64 %shift +} + +; Check that the shift amount uses an address register. It cannot be in %r0. +define i64 @f14(i64 %a, i64 *%ptr) { +; CHECK: f14: +; CHECK: l %r1, 4(%r3) +; CHECK: srag %r2, %r2, 0(%r1) +; CHECK: br %r14 + %amt = load i64 *%ptr + %shift = ashr i64 %a, %amt + ret i64 %shift +} diff --git a/test/CodeGen/SystemZ/shift-08.ll b/test/CodeGen/SystemZ/shift-08.ll new file mode 100644 index 0000000..0688a06 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-08.ll @@ -0,0 +1,190 @@ +; Test 32-bit rotates left. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the low end of the RLLG range. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: rllg %r2, %r2, 1 +; CHECK: br %r14 + %parta = shl i64 %a, 1 + %partb = lshr i64 %a, 63 + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check the high end of the defined RLLG range. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: rllg %r2, %r2, 63 +; CHECK: br %r14 + %parta = shl i64 %a, 63 + %partb = lshr i64 %a, 1 + %or = or i64 %parta, %partb + ret i64 %or +} + +; We don't generate shifts by out-of-range values. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK-NOT: rllg +; CHECK: br %r14 + %parta = shl i64 %a, 64 + %partb = lshr i64 %a, 0 + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check variable shifts. +define i64 @f4(i64 %a, i64 %amt) { +; CHECK: f4: +; CHECK: rllg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %amtb = sub i64 64, %amt + %parta = shl i64 %a, %amt + %partb = lshr i64 %a, %amtb + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check shift amounts that have a constant term. +define i64 @f5(i64 %a, i64 %amt) { +; CHECK: f5: +; CHECK: rllg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 10 + %sub = sub i64 64, %add + %parta = shl i64 %a, %add + %partb = lshr i64 %a, %sub + %or = or i64 %parta, %partb + ret i64 %or +} + +; ...and again with a sign-extended 32-bit shift amount. +define i64 @f6(i64 %a, i32 %amt) { +; CHECK: f6: +; CHECK: rllg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %sub = sub i32 64, %add + %addext = sext i32 %add to i64 + %subext = sext i32 %sub to i64 + %parta = shl i64 %a, %addext + %partb = lshr i64 %a, %subext + %or = or i64 %parta, %partb + ret i64 %or +} + +; ...and now with a zero-extended 32-bit shift amount. +define i64 @f7(i64 %a, i32 %amt) { +; CHECK: f7: +; CHECK: rllg %r2, %r2, 10(%r3) +; CHECK: br %r14 + %add = add i32 %amt, 10 + %sub = sub i32 64, %add + %addext = zext i32 %add to i64 + %subext = zext i32 %sub to i64 + %parta = shl i64 %a, %addext + %partb = lshr i64 %a, %subext + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check shift amounts that have the largest in-range constant term. We could +; mask the amount instead. +define i64 @f8(i64 %a, i64 %amt) { +; CHECK: f8: +; CHECK: rllg %r2, %r2, 524287(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 524287 + %sub = sub i64 64, %add + %parta = shl i64 %a, %add + %partb = lshr i64 %a, %sub + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check the next value up, which without masking must use a separate +; addition. +define i64 @f9(i64 %a, i64 %amt) { +; CHECK: f9: +; CHECK: a{{g?}}fi %r3, 524288 +; CHECK: rllg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %add = add i64 %amt, 524288 + %sub = sub i64 64, %add + %parta = shl i64 %a, %add + %partb = lshr i64 %a, %sub + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check cases where 1 is subtracted from the shift amount. +define i64 @f10(i64 %a, i64 %amt) { +; CHECK: f10: +; CHECK: rllg %r2, %r2, -1(%r3) +; CHECK: br %r14 + %suba = sub i64 %amt, 1 + %subb = sub i64 64, %suba + %parta = shl i64 %a, %suba + %partb = lshr i64 %a, %subb + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check the lowest value that can be subtracted from the shift amount. +; Again, we could mask the shift amount instead. +define i64 @f11(i64 %a, i64 %amt) { +; CHECK: f11: +; CHECK: rllg %r2, %r2, -524288(%r3) +; CHECK: br %r14 + %suba = sub i64 %amt, 524288 + %subb = sub i64 64, %suba + %parta = shl i64 %a, %suba + %partb = lshr i64 %a, %subb + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check the next value down, which without masking must use a separate +; addition. +define i64 @f12(i64 %a, i64 %amt) { +; CHECK: f12: +; CHECK: a{{g?}}fi %r3, -524289 +; CHECK: rllg %r2, %r2, 0(%r3) +; CHECK: br %r14 + %suba = sub i64 %amt, 524289 + %subb = sub i64 64, %suba + %parta = shl i64 %a, %suba + %partb = lshr i64 %a, %subb + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check that we don't try to generate "indexed" shifts. +define i64 @f13(i64 %a, i64 %b, i64 %c) { +; CHECK: f13: +; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}} +; CHECK: rllg %r2, %r2, 0({{%r[34]}}) +; CHECK: br %r14 + %add = add i64 %b, %c + %sub = sub i64 64, %add + %parta = shl i64 %a, %add + %partb = lshr i64 %a, %sub + %or = or i64 %parta, %partb + ret i64 %or +} + +; Check that the shift amount uses an address register. It cannot be in %r0. +define i64 @f14(i64 %a, i64 *%ptr) { +; CHECK: f14: +; CHECK: l %r1, 4(%r3) +; CHECK: rllg %r2, %r2, 0(%r1) +; CHECK: br %r14 + %amt = load i64 *%ptr + %amtb = sub i64 64, %amt + %parta = shl i64 %a, %amt + %partb = lshr i64 %a, %amtb + %or = or i64 %parta, %partb + ret i64 %or +} diff --git a/test/CodeGen/SystemZ/tls-01.ll b/test/CodeGen/SystemZ/tls-01.ll new file mode 100644 index 0000000..49037ad --- /dev/null +++ b/test/CodeGen/SystemZ/tls-01.ll @@ -0,0 +1,22 @@ +; Test initial-exec TLS accesses. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP + +@x = thread_local global i32 0 + +; The offset must be loaded from the constant pool. It doesn't really +; matter whether we use LARL/AG or LGRL/AGR for the last part. +define i32 *@foo() { +; CHECK-CP: .LCP{{.*}}: +; CHECK-CP: .quad x@NTPOFF +; +; CHECK-MAIN: foo: +; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 +; CHECK-MAIN: sllg %r2, [[HIGH]], 32 +; CHECK-MAIN: ear %r2, %a1 +; CHECK-MAIN: larl %r1, .LCP{{.*}} +; CHECK-MAIN: ag %r2, 0(%r1) +; CHECK-MAIN: br %r14 + ret i32 *@x +} diff --git a/test/CodeGen/SystemZ/xor-01.ll b/test/CodeGen/SystemZ/xor-01.ll new file mode 100644 index 0000000..30bdbe7 --- /dev/null +++ b/test/CodeGen/SystemZ/xor-01.ll @@ -0,0 +1,129 @@ +; Test 32-bit XORs in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check XR. +define i32 @f1(i32 %a, i32 %b) { +; CHECK: f1: +; CHECK: xr %r2, %r3 +; CHECK: br %r14 + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check the low end of the X range. +define i32 @f2(i32 %a, i32 *%src) { +; CHECK: f2: +; CHECK: x %r2, 0(%r3) +; CHECK: br %r14 + %b = load i32 *%src + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check the high end of the aligned X range. +define i32 @f3(i32 %a, i32 *%src) { +; CHECK: f3: +; CHECK: x %r2, 4092(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1023 + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check the next word up, which should use XY instead of X. +define i32 @f4(i32 %a, i32 *%src) { +; CHECK: f4: +; CHECK: xy %r2, 4096(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 1024 + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check the high end of the aligned XY range. +define i32 @f5(i32 %a, i32 *%src) { +; CHECK: f5: +; CHECK: xy %r2, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131071 + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f6(i32 %a, i32 *%src) { +; CHECK: f6: +; CHECK: agfi %r3, 524288 +; CHECK: x %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 131072 + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check the high end of the negative aligned XY range. +define i32 @f7(i32 %a, i32 *%src) { +; CHECK: f7: +; CHECK: xy %r2, -4(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -1 + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check the low end of the XY range. +define i32 @f8(i32 %a, i32 *%src) { +; CHECK: f8: +; CHECK: xy %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131072 + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i32 @f9(i32 %a, i32 *%src) { +; CHECK: f9: +; CHECK: agfi %r3, -524292 +; CHECK: x %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i64 -131073 + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check that X allows an index. +define i32 @f10(i32 %a, i64 %src, i64 %index) { +; CHECK: f10: +; CHECK: x %r2, 4092({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} + +; Check that XY allows an index. +define i32 @f11(i32 %a, i64 %src, i64 %index) { +; CHECK: f11: +; CHECK: xy %r2, 4096({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32 *%ptr + %xor = xor i32 %a, %b + ret i32 %xor +} diff --git a/test/CodeGen/SystemZ/xor-02.ll b/test/CodeGen/SystemZ/xor-02.ll new file mode 100644 index 0000000..c2b52b9 --- /dev/null +++ b/test/CodeGen/SystemZ/xor-02.ll @@ -0,0 +1,40 @@ +; Test 32-bit XORs in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful XILF value. +define i32 @f1(i32 %a) { +; CHECK: f1: +; CHECK: xilf %r2, 1 +; CHECK: br %r14 + %xor = xor i32 %a, 1 + ret i32 %xor +} + +; Check the high end of the signed range. +define i32 @f2(i32 %a) { +; CHECK: f2: +; CHECK: xilf %r2, 2147483647 +; CHECK: br %r14 + %xor = xor i32 %a, 2147483647 + ret i32 %xor +} + +; Check the low end of the signed range, which should be treated +; as a positive value. +define i32 @f3(i32 %a) { +; CHECK: f3: +; CHECK: xilf %r2, 2147483648 +; CHECK: br %r14 + %xor = xor i32 %a, -2147483648 + ret i32 %xor +} + +; Check the high end of the XILF range. +define i32 @f4(i32 %a) { +; CHECK: f4: +; CHECK: xilf %r2, 4294967295 +; CHECK: br %r14 + %xor = xor i32 %a, 4294967295 + ret i32 %xor +} diff --git a/test/CodeGen/SystemZ/xor-03.ll b/test/CodeGen/SystemZ/xor-03.ll new file mode 100644 index 0000000..a4851b3 --- /dev/null +++ b/test/CodeGen/SystemZ/xor-03.ll @@ -0,0 +1,94 @@ +; Test 64-bit XORs in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check XGR. +define i64 @f1(i64 %a, i64 %b) { +; CHECK: f1: +; CHECK: xgr %r2, %r3 +; CHECK: br %r14 + %xor = xor i64 %a, %b + ret i64 %xor +} + +; Check XG with no displacement. +define i64 @f2(i64 %a, i64 *%src) { +; CHECK: f2: +; CHECK: xg %r2, 0(%r3) +; CHECK: br %r14 + %b = load i64 *%src + %xor = xor i64 %a, %b + ret i64 %xor +} + +; Check the high end of the aligned XG range. +define i64 @f3(i64 %a, i64 *%src) { +; CHECK: f3: +; CHECK: xg %r2, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %b = load i64 *%ptr + %xor = xor i64 %a, %b + ret i64 %xor +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f4(i64 %a, i64 *%src) { +; CHECK: f4: +; CHECK: agfi %r3, 524288 +; CHECK: xg %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %b = load i64 *%ptr + %xor = xor i64 %a, %b + ret i64 %xor +} + +; Check the high end of the negative aligned XG range. +define i64 @f5(i64 %a, i64 *%src) { +; CHECK: f5: +; CHECK: xg %r2, -8(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -1 + %b = load i64 *%ptr + %xor = xor i64 %a, %b + ret i64 %xor +} + +; Check the low end of the XG range. +define i64 @f6(i64 %a, i64 *%src) { +; CHECK: f6: +; CHECK: xg %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %b = load i64 *%ptr + %xor = xor i64 %a, %b + ret i64 %xor +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i64 %a, i64 *%src) { +; CHECK: f7: +; CHECK: agfi %r3, -524296 +; CHECK: xg %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %b = load i64 *%ptr + %xor = xor i64 %a, %b + ret i64 %xor +} + +; Check that XG allows an index. +define i64 @f8(i64 %a, i64 %src, i64 %index) { +; CHECK: f8: +; CHECK: xg %r2, 524280({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 *%ptr + %xor = xor i64 %a, %b + ret i64 %xor +} diff --git a/test/CodeGen/SystemZ/xor-04.ll b/test/CodeGen/SystemZ/xor-04.ll new file mode 100644 index 0000000..cc141d3 --- /dev/null +++ b/test/CodeGen/SystemZ/xor-04.ll @@ -0,0 +1,69 @@ +; Test 64-bit XORs in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful XILF value. +define i64 @f1(i64 %a) { +; CHECK: f1: +; CHECK: xilf %r2, 1 +; CHECK: br %r14 + %xor = xor i64 %a, 1 + ret i64 %xor +} + +; Check the high end of the XILF range. +define i64 @f2(i64 %a) { +; CHECK: f2: +; CHECK: xilf %r2, 4294967295 +; CHECK: br %r14 + %xor = xor i64 %a, 4294967295 + ret i64 %xor +} + +; Check the lowest useful XIHF value, which is one up from the above. +define i64 @f3(i64 %a) { +; CHECK: f3: +; CHECK: xihf %r2, 1 +; CHECK: br %r14 + %xor = xor i64 %a, 4294967296 + ret i64 %xor +} + +; Check the next value up again, which needs a combination of XIHF and XILF. +define i64 @f4(i64 %a) { +; CHECK: f4: +; CHECK: xihf %r2, 1 +; CHECK: xilf %r2, 4294967295 +; CHECK: br %r14 + %xor = xor i64 %a, 8589934591 + ret i64 %xor +} + +; Check the high end of the XIHF range. +define i64 @f5(i64 %a) { +; CHECK: f5: +; CHECK: xihf %r2, 4294967295 +; CHECK: br %r14 + %xor = xor i64 %a, -4294967296 + ret i64 %xor +} + +; Check the next value up, which again must use XIHF and XILF. +define i64 @f6(i64 %a) { +; CHECK: f6: +; CHECK: xihf %r2, 4294967295 +; CHECK: xilf %r2, 1 +; CHECK: br %r14 + %xor = xor i64 %a, -4294967295 + ret i64 %xor +} + +; Check full bitwise negation +define i64 @f7(i64 %a) { +; CHECK: f7: +; CHECK: xihf %r2, 4294967295 +; CHECK: xilf %r2, 4294967295 +; CHECK: br %r14 + %xor = xor i64 %a, -1 + ret i64 %xor +} diff --git a/test/CodeGen/SystemZ/xor-05.ll b/test/CodeGen/SystemZ/xor-05.ll new file mode 100644 index 0000000..9ef0d20 --- /dev/null +++ b/test/CodeGen/SystemZ/xor-05.ll @@ -0,0 +1,165 @@ +; Test XORs of a constant into a byte of memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check the lowest useful constant, expressed as a signed integer. +define void @f1(i8 *%ptr) { +; CHECK: f1: +; CHECK: xi 0(%r2), 1 +; CHECK: br %r14 + %val = load i8 *%ptr + %xor = xor i8 %val, -255 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the highest useful constant, expressed as a signed integer. +define void @f2(i8 *%ptr) { +; CHECK: f2: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %xor = xor i8 %val, -2 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the lowest useful constant, expressed as an unsigned integer. +define void @f3(i8 *%ptr) { +; CHECK: f3: +; CHECK: xi 0(%r2), 1 +; CHECK: br %r14 + %val = load i8 *%ptr + %xor = xor i8 %val, 1 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the highest useful constant, expressed as a unsigned integer. +define void @f4(i8 *%ptr) { +; CHECK: f4: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %xor = xor i8 %val, 254 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the high end of the XI range. +define void @f5(i8 *%src) { +; CHECK: f5: +; CHECK: xi 4095(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4095 + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the next byte up, which should use XIY instead of XI. +define void @f6(i8 *%src) { +; CHECK: f6: +; CHECK: xiy 4096(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 4096 + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the high end of the XIY range. +define void @f7(i8 *%src) { +; CHECK: f7: +; CHECK: xiy 524287(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524287 + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(i8 *%src) { +; CHECK: f8: +; CHECK: agfi %r2, 524288 +; CHECK: xi 0(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 524288 + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the high end of the negative XIY range. +define void @f9(i8 *%src) { +; CHECK: f9: +; CHECK: xiy -1(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -1 + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the low end of the XIY range. +define void @f10(i8 *%src) { +; CHECK: f10: +; CHECK: xiy -524288(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524288 + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f11(i8 *%src) { +; CHECK: f11: +; CHECK: agfi %r2, -524289 +; CHECK: xi 0(%r2), 127 +; CHECK: br %r14 + %ptr = getelementptr i8 *%src, i64 -524289 + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} + +; Check that XI does not allow an index +define void @f12(i64 %src, i64 %index) { +; CHECK: f12: +; CHECK: agr %r2, %r3 +; CHECK: xi 4095(%r2), 127 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4095 + %ptr = inttoptr i64 %add2 to i8 * + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} + +; Check that XIY does not allow an index +define void @f13(i64 %src, i64 %index) { +; CHECK: f13: +; CHECK: agr %r2, %r3 +; CHECK: xiy 4096(%r2), 127 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i8 * + %val = load i8 *%ptr + %xor = xor i8 %val, 127 + store i8 %xor, i8 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/xor-06.ll b/test/CodeGen/SystemZ/xor-06.ll new file mode 100644 index 0000000..0ffff47 --- /dev/null +++ b/test/CodeGen/SystemZ/xor-06.ll @@ -0,0 +1,108 @@ +; Test that we can use XI for byte operations that are expressed as i32 +; or i64 operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Zero extension to 32 bits, negative constant. +define void @f1(i8 *%ptr) { +; CHECK: f1: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %xor = xor i32 %ext, -2 + %trunc = trunc i32 %xor to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 64 bits, negative constant. +define void @f2(i8 *%ptr) { +; CHECK: f2: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %xor = xor i64 %ext, -2 + %trunc = trunc i64 %xor to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 32 bits, positive constant. +define void @f3(i8 *%ptr) { +; CHECK: f3: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i32 + %xor = xor i32 %ext, 254 + %trunc = trunc i32 %xor to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Zero extension to 64 bits, positive constant. +define void @f4(i8 *%ptr) { +; CHECK: f4: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = zext i8 %val to i64 + %xor = xor i64 %ext, 254 + %trunc = trunc i64 %xor to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 32 bits, negative constant. +define void @f5(i8 *%ptr) { +; CHECK: f5: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %xor = xor i32 %ext, -2 + %trunc = trunc i32 %xor to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 64 bits, negative constant. +define void @f6(i8 *%ptr) { +; CHECK: f6: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %xor = xor i64 %ext, -2 + %trunc = trunc i64 %xor to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 32 bits, positive constant. +define void @f7(i8 *%ptr) { +; CHECK: f7: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i32 + %xor = xor i32 %ext, 254 + %trunc = trunc i32 %xor to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Sign extension to 64 bits, positive constant. +define void @f8(i8 *%ptr) { +; CHECK: f8: +; CHECK: xi 0(%r2), 254 +; CHECK: br %r14 + %val = load i8 *%ptr + %ext = sext i8 %val to i64 + %xor = xor i64 %ext, 254 + %trunc = trunc i64 %xor to i8 + store i8 %trunc, i8 *%ptr + ret void +} |