diff options
author | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-07-09 09:32:42 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-07-09 09:32:42 +0000 |
commit | f6ea5e0d8007234fc74c1ff6ac2c3ca316c41d92 (patch) | |
tree | 885d12c6564d8dc4cc6042b030eb521372518cbb | |
parent | fcb7b97892dad5bc6ae55f513f8a111563078996 (diff) | |
download | external_llvm-f6ea5e0d8007234fc74c1ff6ac2c3ca316c41d92.zip external_llvm-f6ea5e0d8007234fc74c1ff6ac2c3ca316c41d92.tar.gz external_llvm-f6ea5e0d8007234fc74c1ff6ac2c3ca316c41d92.tar.bz2 |
[SystemZ] Use "STC;MVC" for memset
Use "STC;MVC" for memsets that are too big for two STCs or MV...Is yet
small enough for a single MVC. As with memcpy, I'm leaving longer cases
till later.
The number of tests might seem excessive, but f33 & f34 from memset-04.ll
failed the first cut because I'd not added the "?:" on the calculation
of Size1.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185918 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.cpp | 8 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 81 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 6 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/memset-01.ll | 124 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/memset-02.ll | 160 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/memset-03.ll | 396 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/memset-04.ll | 396 |
7 files changed, 1171 insertions, 0 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index bf35946..b1abc2c 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -245,6 +245,14 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; MaxStoresPerMemcpyOptSize = 0; + + // The main memset sequence is a byte store followed by an MVC. + // Two STC or MV..I stores win over that, but the kind of fused stores + // generated by target-independent code don't when the byte value is + // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better + // than "STC;MVC". Handle the choice in target-specific code instead. + MaxStoresPerMemset = 0; + MaxStoresPerMemsetOptSize = 0; } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index d2da9d2..4ca9292 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -44,3 +44,84 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } return SDValue(); } + +// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by +// Chain, Dst, ByteVal and Size. These cases are expected to use +// MVI, MVHHI, MVHI and MVGHI respectively. +static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, uint64_t ByteVal, uint64_t Size, + unsigned Align, + MachinePointerInfo DstPtrInfo) { + uint64_t StoreVal = ByteVal; + for (unsigned I = 1; I < Size; ++I) + StoreVal |= ByteVal << (I * 8); + return DAG.getStore(Chain, DL, + DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), + Dst, DstPtrInfo, false, false, Align); +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Byte, SDValue Size, + unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const { + EVT DstVT = Dst.getValueType(); + + if (IsVolatile) + return SDValue(); + + if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes == 0) + return SDValue(); + if (ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte)) { + // Handle cases that can be done using at most two of + // MVI, MVHI, MVHHI and MVGHI. The latter two can only be + // used if ByteVal is all zeros or all ones; in other casees, + // we can move at most 2 halfwords. + uint64_t ByteVal = CByte->getZExtValue(); + if (ByteVal == 0 || ByteVal == 255 ? + Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : + Bytes <= 4) { + unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); + unsigned Size2 = Bytes - Size1; + SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, + Align, DstPtrInfo); + if (Size2 == 0) + return Chain1; + Dst = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(Size1, DstVT)); + DstPtrInfo = DstPtrInfo.getWithOffset(Size1); + SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, + std::min(Align, Size1), DstPtrInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } else { + // Handle one and two bytes using STC. + if (Bytes <= 2) { + SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + if (Bytes == 1) + return Chain1; + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(1, DstVT)); + SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, + DstPtrInfo.getWithOffset(1), + false, false, 1); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } + assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); + if (Bytes <= 0x101) { + // Copy the byte to the first location and then use MVC to copy + // it to the rest. + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(1, DstVT)); + return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst2, Dst, + DAG.getConstant(Bytes - 1, MVT::i32)); + } + } + return SDValue(); +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 39c1491..9138a9c 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -33,6 +33,12 @@ public: MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE; + + virtual SDValue + EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Dst, SDValue Byte, + SDValue Size, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const; }; } diff --git a/test/CodeGen/SystemZ/memset-01.ll b/test/CodeGen/SystemZ/memset-01.ll new file mode 100644 index 0000000..1592318 --- /dev/null +++ b/test/CodeGen/SystemZ/memset-01.ll @@ -0,0 +1,124 @@ +; Test memset in cases where the set value is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind + +; No bytes, i32 version. +define void @f1(i8 *%dest, i8 %val) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK-NOT: %r3 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 0, i32 1, i1 false) + ret void +} + +; No bytes, i64 version. +define void @f2(i8 *%dest, i8 %val) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK-NOT: %r3 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 0, i32 1, i1 false) + ret void +} + +; 1 byte, i32 version. +define void @f3(i8 *%dest, i8 %val) { +; CHECK: f3: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 1, i32 1, i1 false) + ret void +} + +; 1 byte, i64 version. +define void @f4(i8 *%dest, i8 %val) { +; CHECK: f4: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1, i32 1, i1 false) + ret void +} + +; 2 bytes, i32 version. +define void @f5(i8 *%dest, i8 %val) { +; CHECK: f5: +; CHECK-DAG: stc %r3, 0(%r2) +; CHECK-DAG: stc %r3, 1(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 2, i32 1, i1 false) + ret void +} + +; 2 bytes, i64 version. +define void @f6(i8 *%dest, i8 %val) { +; CHECK: f6: +; CHECK-DAG: stc %r3, 0(%r2) +; CHECK-DAG: stc %r3, 1(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 2, i32 1, i1 false) + ret void +} + +; 3 bytes, i32 version. +define void @f7(i8 *%dest, i8 %val) { +; CHECK: f7: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(2,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 3, i32 1, i1 false) + ret void +} + +; 3 bytes, i64 version. +define void @f8(i8 *%dest, i8 %val) { +; CHECK: f8: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(2,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 3, i32 1, i1 false) + ret void +} + +; 257 bytes, i32 version. +define void @f9(i8 *%dest, i8 %val) { +; CHECK: f9: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 257, i32 1, i1 false) + ret void +} + +; 257 bytes, i64 version. +define void @f10(i8 *%dest, i8 %val) { +; CHECK: f10: +; CHECK: stc %r3, 0(%r2) +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 257, i32 1, i1 false) + ret void +} + +; 258 bytes, i32 version. 258 bytes is too big for a single MVC. +; For now expect none, so that the test fails and gets updated when +; large copies are implemented. +define void @f11(i8 *%dest, i8 %val) { +; CHECK: f11: +; CHECK-NOT: mvc +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 258, i32 1, i1 false) + ret void +} + +; 258 bytes, i64 version, with the same comments as above. +define void @f12(i8 *%dest, i8 %val) { +; CHECK: f12: +; CHECK-NOT: mvc +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 258, i32 1, i1 false) + ret void +} diff --git a/test/CodeGen/SystemZ/memset-02.ll b/test/CodeGen/SystemZ/memset-02.ll new file mode 100644 index 0000000..c2c45fb --- /dev/null +++ b/test/CodeGen/SystemZ/memset-02.ll @@ -0,0 +1,160 @@ +; Test memset in cases where the set value is a constant other than 0 and -1. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind + +; No bytes, i32 version. +define void @f1(i8 *%dest) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 0, i32 1, i1 false) + ret void +} + +; No bytes, i64 version. +define void @f2(i8 *%dest) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 0, i32 1, i1 false) + ret void +} + +; 1 byte, i32 version. +define void @f3(i8 *%dest) { +; CHECK: f3: +; CHECK: mvi 0(%r2), 128 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 1, i32 1, i1 false) + ret void +} + +; 1 byte, i64 version. +define void @f4(i8 *%dest) { +; CHECK: f4: +; CHECK: mvi 0(%r2), 128 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 1, i32 1, i1 false) + ret void +} + +; 2 bytes, i32 version. +define void @f5(i8 *%dest) { +; CHECK: f5: +; CHECK: mvhhi 0(%r2), -32640 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 2, i32 1, i1 false) + ret void +} + +; 2 bytes, i64 version. +define void @f6(i8 *%dest) { +; CHECK: f6: +; CHECK: mvhhi 0(%r2), -32640 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 2, i32 1, i1 false) + ret void +} + +; 3 bytes, i32 version. +define void @f7(i8 *%dest) { +; CHECK: f7: +; CHECK-DAG: mvhhi 0(%r2), -32640 +; CHECK-DAG: mvi 2(%r2), 128 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 3, i32 1, i1 false) + ret void +} + +; 3 bytes, i64 version. +define void @f8(i8 *%dest) { +; CHECK: f8: +; CHECK-DAG: mvhhi 0(%r2), -32640 +; CHECK-DAG: mvi 2(%r2), 128 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 3, i32 1, i1 false) + ret void +} + +; 4 bytes, i32 version. +define void @f9(i8 *%dest) { +; CHECK: f9: +; CHECK: iilf [[REG:%r[0-5]]], 2155905152 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 4, i32 1, i1 false) + ret void +} + +; 4 bytes, i64 version. +define void @f10(i8 *%dest) { +; CHECK: f10: +; CHECK: iilf [[REG:%r[0-5]]], 2155905152 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 4, i32 1, i1 false) + ret void +} + +; 5 bytes, i32 version. +define void @f11(i8 *%dest) { +; CHECK: f11: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(4,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 5, i32 1, i1 false) + ret void +} + +; 5 bytes, i64 version. +define void @f12(i8 *%dest) { +; CHECK: f12: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(4,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 5, i32 1, i1 false) + ret void +} + +; 257 bytes, i32 version. +define void @f13(i8 *%dest) { +; CHECK: f13: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 257, i32 1, i1 false) + ret void +} + +; 257 bytes, i64 version. +define void @f14(i8 *%dest) { +; CHECK: f14: +; CHECK: mvi 0(%r2), 128 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 257, i32 1, i1 false) + ret void +} + +; 258 bytes, i32 version. 258 bytes is too big for a single MVC. +; For now expect none, so that the test fails and gets updated when +; large copies are implemented. +define void @f15(i8 *%dest) { +; CHECK: f15: +; CHECK-NOT: mvc +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 258, i32 1, i1 false) + ret void +} + +; 258 bytes, i64 version, with the same comments as above. +define void @f16(i8 *%dest) { +; CHECK: f16: +; CHECK-NOT: mvc +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 258, i32 1, i1 false) + ret void +} diff --git a/test/CodeGen/SystemZ/memset-03.ll b/test/CodeGen/SystemZ/memset-03.ll new file mode 100644 index 0000000..b18cca4 --- /dev/null +++ b/test/CodeGen/SystemZ/memset-03.ll @@ -0,0 +1,396 @@ +; Test memsets that clear all bits. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind + +; No bytes, i32 version. +define void @f1(i8 *%dest) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 0, i32 1, i1 false) + ret void +} + +; No bytes, i64 version. +define void @f2(i8 *%dest) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 0, i32 1, i1 false) + ret void +} + +; 1 byte, i32 version. +define void @f3(i8 *%dest) { +; CHECK: f3: +; CHECK: mvi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 1, i32 1, i1 false) + ret void +} + +; 1 byte, i64 version. +define void @f4(i8 *%dest) { +; CHECK: f4: +; CHECK: mvi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 1, i32 1, i1 false) + ret void +} + +; 2 bytes, i32 version. +define void @f5(i8 *%dest) { +; CHECK: f5: +; CHECK: mvhhi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 2, i32 1, i1 false) + ret void +} + +; 2 bytes, i64 version. +define void @f6(i8 *%dest) { +; CHECK: f6: +; CHECK: mvhhi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 2, i32 1, i1 false) + ret void +} + +; 3 bytes, i32 version. +define void @f7(i8 *%dest) { +; CHECK: f7: +; CHECK-DAG: mvhhi 0(%r2), 0 +; CHECK-DAG: mvi 2(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 3, i32 1, i1 false) + ret void +} + +; 3 bytes, i64 version. +define void @f8(i8 *%dest) { +; CHECK: f8: +; CHECK-DAG: mvhhi 0(%r2), 0 +; CHECK-DAG: mvi 2(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 3, i32 1, i1 false) + ret void +} + +; 4 bytes, i32 version. +define void @f9(i8 *%dest) { +; CHECK: f9: +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 4, i32 1, i1 false) + ret void +} + +; 4 bytes, i64 version. +define void @f10(i8 *%dest) { +; CHECK: f10: +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 4, i32 1, i1 false) + ret void +} + +; 5 bytes, i32 version. +define void @f11(i8 *%dest) { +; CHECK: f11: +; CHECK-DAG: mvhi 0(%r2), 0 +; CHECK-DAG: mvi 4(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 5, i32 1, i1 false) + ret void +} + +; 5 bytes, i64 version. +define void @f12(i8 *%dest) { +; CHECK: f12: +; CHECK-DAG: mvhi 0(%r2), 0 +; CHECK-DAG: mvi 4(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 5, i32 1, i1 false) + ret void +} + +; 6 bytes, i32 version. +define void @f13(i8 *%dest) { +; CHECK: f13: +; CHECK-DAG: mvhi 0(%r2), 0 +; CHECK-DAG: mvhhi 4(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 6, i32 1, i1 false) + ret void +} + +; 6 bytes, i64 version. +define void @f14(i8 *%dest) { +; CHECK: f14: +; CHECK-DAG: mvhi 0(%r2), 0 +; CHECK-DAG: mvhhi 4(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 6, i32 1, i1 false) + ret void +} + +; 7 bytes, i32 version. +define void @f15(i8 *%dest) { +; CHECK: f15: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(6,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 7, i32 1, i1 false) + ret void +} + +; 7 bytes, i64 version. +define void @f16(i8 *%dest) { +; CHECK: f16: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(6,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 7, i32 1, i1 false) + ret void +} + +; 8 bytes, i32 version. +define void @f17(i8 *%dest) { +; CHECK: f17: +; CHECK: mvghi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 8, i32 1, i1 false) + ret void +} + +; 8 bytes, i64 version. +define void @f18(i8 *%dest) { +; CHECK: f18: +; CHECK: mvghi 0(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 8, i32 1, i1 false) + ret void +} + +; 9 bytes, i32 version. +define void @f19(i8 *%dest) { +; CHECK: f19: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 9, i32 1, i1 false) + ret void +} + +; 9 bytes, i64 version. +define void @f20(i8 *%dest) { +; CHECK: f20: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 9, i32 1, i1 false) + ret void +} + +; 10 bytes, i32 version. +define void @f21(i8 *%dest) { +; CHECK: f21: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvhhi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 10, i32 1, i1 false) + ret void +} + +; 10 bytes, i64 version. +define void @f22(i8 *%dest) { +; CHECK: f22: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvhhi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 10, i32 1, i1 false) + ret void +} + +; 11 bytes, i32 version. +define void @f23(i8 *%dest) { +; CHECK: f23: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(10,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 11, i32 1, i1 false) + ret void +} + +; 11 bytes, i64 version. +define void @f24(i8 *%dest) { +; CHECK: f24: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(10,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 11, i32 1, i1 false) + ret void +} + +; 12 bytes, i32 version. +define void @f25(i8 *%dest) { +; CHECK: f25: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvhi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 12, i32 1, i1 false) + ret void +} + +; 12 bytes, i64 version. +define void @f26(i8 *%dest) { +; CHECK: f26: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvhi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 12, i32 1, i1 false) + ret void +} + +; 13 bytes, i32 version. +define void @f27(i8 *%dest) { +; CHECK: f27: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(12,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 13, i32 1, i1 false) + ret void +} + +; 13 bytes, i64 version. +define void @f28(i8 *%dest) { +; CHECK: f28: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(12,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 13, i32 1, i1 false) + ret void +} + +; 14 bytes, i32 version. +define void @f29(i8 *%dest) { +; CHECK: f29: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(13,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 14, i32 1, i1 false) + ret void +} + +; 14 bytes, i64 version. +define void @f30(i8 *%dest) { +; CHECK: f30: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(13,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 14, i32 1, i1 false) + ret void +} + +; 15 bytes, i32 version. +define void @f31(i8 *%dest) { +; CHECK: f31: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(14,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 15, i32 1, i1 false) + ret void +} + +; 15 bytes, i64 version. +define void @f32(i8 *%dest) { +; CHECK: f32: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(14,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 15, i32 1, i1 false) + ret void +} + +; 16 bytes, i32 version. +define void @f33(i8 *%dest) { +; CHECK: f33: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvghi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 16, i32 1, i1 false) + ret void +} + +; 16 bytes, i64 version. +define void @f34(i8 *%dest) { +; CHECK: f34: +; CHECK-DAG: mvghi 0(%r2), 0 +; CHECK-DAG: mvghi 8(%r2), 0 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 16, i32 1, i1 false) + ret void +} + +; 17 bytes, i32 version. +define void @f35(i8 *%dest) { +; CHECK: f35: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(16,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 17, i32 1, i1 false) + ret void +} + +; 17 bytes, i64 version. +define void @f36(i8 *%dest) { +; CHECK: f36: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(16,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 17, i32 1, i1 false) + ret void +} + +; 257 bytes, i32 version. +define void @f37(i8 *%dest) { +; CHECK: f37: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 257, i32 1, i1 false) + ret void +} + +; 257 bytes, i64 version. +define void @f38(i8 *%dest) { +; CHECK: f38: +; CHECK: mvi 0(%r2), 0 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 257, i32 1, i1 false) + ret void +} + +; 258 bytes, i32 version. 258 bytes is too big for a single MVC. +; For now expect none, so that the test fails and gets updated when +; large copies are implemented. +define void @f39(i8 *%dest) { +; CHECK: f39: +; CHECK-NOT: mvc +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 258, i32 1, i1 false) + ret void +} + +; 258 bytes, i64 version, with the same comments as above. +define void @f40(i8 *%dest) { +; CHECK: f40: +; CHECK-NOT: mvc +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 258, i32 1, i1 false) + ret void +} diff --git a/test/CodeGen/SystemZ/memset-04.ll b/test/CodeGen/SystemZ/memset-04.ll new file mode 100644 index 0000000..679e21f --- /dev/null +++ b/test/CodeGen/SystemZ/memset-04.ll @@ -0,0 +1,396 @@ +; Test memsets that set all bits. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind + +; No bytes, i32 version. +define void @f1(i8 *%dest) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 0, i32 1, i1 false) + ret void +} + +; No bytes, i64 version. +define void @f2(i8 *%dest) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 0, i32 1, i1 false) + ret void +} + +; 1 byte, i32 version. +define void @f3(i8 *%dest) { +; CHECK: f3: +; CHECK: mvi 0(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 1, i32 1, i1 false) + ret void +} + +; 1 byte, i64 version. +define void @f4(i8 *%dest) { +; CHECK: f4: +; CHECK: mvi 0(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 1, i32 1, i1 false) + ret void +} + +; 2 bytes, i32 version. +define void @f5(i8 *%dest) { +; CHECK: f5: +; CHECK: mvhhi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 2, i32 1, i1 false) + ret void +} + +; 2 bytes, i64 version. +define void @f6(i8 *%dest) { +; CHECK: f6: +; CHECK: mvhhi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 2, i32 1, i1 false) + ret void +} + +; 3 bytes, i32 version. +define void @f7(i8 *%dest) { +; CHECK: f7: +; CHECK-DAG: mvhhi 0(%r2), -1 +; CHECK-DAG: mvi 2(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 3, i32 1, i1 false) + ret void +} + +; 3 bytes, i64 version. +define void @f8(i8 *%dest) { +; CHECK: f8: +; CHECK-DAG: mvhhi 0(%r2), -1 +; CHECK-DAG: mvi 2(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 3, i32 1, i1 false) + ret void +} + +; 4 bytes, i32 version. +define void @f9(i8 *%dest) { +; CHECK: f9: +; CHECK: mvhi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 4, i32 1, i1 false) + ret void +} + +; 4 bytes, i64 version. +define void @f10(i8 *%dest) { +; CHECK: f10: +; CHECK: mvhi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 4, i32 1, i1 false) + ret void +} + +; 5 bytes, i32 version. +define void @f11(i8 *%dest) { +; CHECK: f11: +; CHECK-DAG: mvhi 0(%r2), -1 +; CHECK-DAG: mvi 4(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 5, i32 1, i1 false) + ret void +} + +; 5 bytes, i64 version. +define void @f12(i8 *%dest) { +; CHECK: f12: +; CHECK-DAG: mvhi 0(%r2), -1 +; CHECK-DAG: mvi 4(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 5, i32 1, i1 false) + ret void +} + +; 6 bytes, i32 version. +define void @f13(i8 *%dest) { +; CHECK: f13: +; CHECK-DAG: mvhi 0(%r2), -1 +; CHECK-DAG: mvhhi 4(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 6, i32 1, i1 false) + ret void +} + +; 6 bytes, i64 version. +define void @f14(i8 *%dest) { +; CHECK: f14: +; CHECK-DAG: mvhi 0(%r2), -1 +; CHECK-DAG: mvhhi 4(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 6, i32 1, i1 false) + ret void +} + +; 7 bytes, i32 version. +define void @f15(i8 *%dest) { +; CHECK: f15: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(6,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 7, i32 1, i1 false) + ret void +} + +; 7 bytes, i64 version. +define void @f16(i8 *%dest) { +; CHECK: f16: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(6,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 7, i32 1, i1 false) + ret void +} + +; 8 bytes, i32 version. +define void @f17(i8 *%dest) { +; CHECK: f17: +; CHECK: mvghi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 8, i32 1, i1 false) + ret void +} + +; 8 bytes, i64 version. +define void @f18(i8 *%dest) { +; CHECK: f18: +; CHECK: mvghi 0(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 8, i32 1, i1 false) + ret void +} + +; 9 bytes, i32 version. +define void @f19(i8 *%dest) { +; CHECK: f19: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvi 8(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 9, i32 1, i1 false) + ret void +} + +; 9 bytes, i64 version. +define void @f20(i8 *%dest) { +; CHECK: f20: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvi 8(%r2), 255 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 9, i32 1, i1 false) + ret void +} + +; 10 bytes, i32 version. +define void @f21(i8 *%dest) { +; CHECK: f21: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvhhi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 10, i32 1, i1 false) + ret void +} + +; 10 bytes, i64 version. +define void @f22(i8 *%dest) { +; CHECK: f22: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvhhi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 10, i32 1, i1 false) + ret void +} + +; 11 bytes, i32 version. +define void @f23(i8 *%dest) { +; CHECK: f23: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(10,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 11, i32 1, i1 false) + ret void +} + +; 11 bytes, i64 version. +define void @f24(i8 *%dest) { +; CHECK: f24: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(10,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 11, i32 1, i1 false) + ret void +} + +; 12 bytes, i32 version. +define void @f25(i8 *%dest) { +; CHECK: f25: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvhi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 12, i32 1, i1 false) + ret void +} + +; 12 bytes, i64 version. +define void @f26(i8 *%dest) { +; CHECK: f26: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvhi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 12, i32 1, i1 false) + ret void +} + +; 13 bytes, i32 version. +define void @f27(i8 *%dest) { +; CHECK: f27: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(12,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 13, i32 1, i1 false) + ret void +} + +; 13 bytes, i64 version. +define void @f28(i8 *%dest) { +; CHECK: f28: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(12,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 13, i32 1, i1 false) + ret void +} + +; 14 bytes, i32 version. +define void @f29(i8 *%dest) { +; CHECK: f29: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(13,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 14, i32 1, i1 false) + ret void +} + +; 14 bytes, i64 version. +define void @f30(i8 *%dest) { +; CHECK: f30: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(13,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 14, i32 1, i1 false) + ret void +} + +; 15 bytes, i32 version. +define void @f31(i8 *%dest) { +; CHECK: f31: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(14,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 15, i32 1, i1 false) + ret void +} + +; 15 bytes, i64 version. +define void @f32(i8 *%dest) { +; CHECK: f32: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(14,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 15, i32 1, i1 false) + ret void +} + +; 16 bytes, i32 version. +define void @f33(i8 *%dest) { +; CHECK: f33: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvghi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 16, i32 1, i1 false) + ret void +} + +; 16 bytes, i64 version. +define void @f34(i8 *%dest) { +; CHECK: f34: +; CHECK-DAG: mvghi 0(%r2), -1 +; CHECK-DAG: mvghi 8(%r2), -1 +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 16, i32 1, i1 false) + ret void +} + +; 17 bytes, i32 version. +define void @f35(i8 *%dest) { +; CHECK: f35: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(16,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 17, i32 1, i1 false) + ret void +} + +; 17 bytes, i64 version. +define void @f36(i8 *%dest) { +; CHECK: f36: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(16,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 17, i32 1, i1 false) + ret void +} + +; 257 bytes, i32 version. +define void @f37(i8 *%dest) { +; CHECK: f37: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 257, i32 1, i1 false) + ret void +} + +; 257 bytes, i64 version. +define void @f38(i8 *%dest) { +; CHECK: f38: +; CHECK: mvi 0(%r2), 255 +; CHECK: mvc 1(256,%r2), 0(%r2) +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 257, i32 1, i1 false) + ret void +} + +; 258 bytes, i32 version. 258 bytes is too big for a single MVC. +; For now expect none, so that the test fails and gets updated when +; large copies are implemented. +define void @f39(i8 *%dest) { +; CHECK: f39: +; CHECK-NOT: mvc +; CHECK: br %r14 + call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 258, i32 1, i1 false) + ret void +} + +; 258 bytes, i64 version, with the same comments as above. +define void @f40(i8 *%dest) { +; CHECK: f40: +; CHECK-NOT: mvc +; CHECK: br %r14 + call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 258, i32 1, i1 false) + ret void +} |