diff options
-rw-r--r-- | lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 46 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrFP.td | 4 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrInfo.td | 22 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZPatterns.td | 14 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/fp-move-02.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/memcpy-02.ll | 417 |
6 files changed, 506 insertions, 2 deletions
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f10ba23..0891adc 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -209,6 +210,8 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + bool storeLoadCanUseMVC(SDNode *N) const; + public: SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel), @@ -533,6 +536,49 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, return Or.getNode(); } +// N is a (store (load ...), ...) pattern. Return true if it can use MVC. +bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { + StoreSDNode *Store = cast<StoreSDNode>(N); + LoadSDNode *Load = cast<LoadSDNode>(Store->getValue().getNode()); + + // MVC is logically a bytewise copy, so can't be used for volatile accesses. + if (Load->isVolatile() || Store->isVolatile()) + return false; + + // Prefer not to use MVC if either address can use ... RELATIVE LONG + // instructions. + assert(Load->getMemoryVT() == Store->getMemoryVT() && + "Should already have checked that the types match"); + uint64_t Size = Load->getMemoryVT().getStoreSize(); + if (Size > 1 && Size <= 8) { + // Prefer LHRL, LRL and LGRL. + if (Load->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) + return false; + // Prefer STHRL, STRL and STGRL. + if (Store->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) + return false; + } + + // There's no chance of overlap if the load is invariant. + if (Load->isInvariant()) + return true; + + // If both operands are aligned, they must be equal or not overlap. + if (Load->getAlignment() >= Size && Store->getAlignment() >= Size) + return true; + + // Otherwise we need to check whether there's an alias. + const Value *V1 = Load->getSrcValue(); + const Value *V2 = Store->getSrcValue(); + if (!V1 || !V2) + return false; + + int64_t End1 = Load->getSrcValueOffset() + Size; + int64_t End2 = Store->getSrcValueOffset() + Size; + return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()), + AliasAnalysis::Location(V2, End2, Store->getTBAAInfo())); +} + SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 4317306..23a3790 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -75,6 +75,10 @@ def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), (EXTRACT_SUBREG FP128:$src2, subreg_high))>; +defm LoadStoreF32 : MVCLoadStore<load, store, f32, MVCWrapper, 4>; +defm LoadStoreF64 : MVCLoadStore<load, store, f64, MVCWrapper, 8>; +defm LoadStoreF128 : MVCLoadStore<load, store, f128, MVCWrapper, 16>; + //===----------------------------------------------------------------------===// // Load instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index b4e5c25..5e13c7f 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -294,6 +294,20 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in [(z_mvc bdaddr12only:$dest, bdaddr12only:$src, imm32len8:$length)]>; +defm LoadStore8_32 : MVCLoadStore<anyextloadi8, truncstorei8, i32, + MVCWrapper, 1>; +defm LoadStore16_32 : MVCLoadStore<anyextloadi16, truncstorei16, i32, + MVCWrapper, 2>; +defm LoadStore32_32 : MVCLoadStore<load, store, i32, MVCWrapper, 4>; + +defm LoadStore8 : MVCLoadStore<anyextloadi8, truncstorei8, i64, + MVCWrapper, 1>; +defm LoadStore16 : MVCLoadStore<anyextloadi16, truncstorei16, i64, + MVCWrapper, 2>; +defm LoadStore32 : MVCLoadStore<anyextloadi32, truncstorei32, i64, + MVCWrapper, 4>; +defm LoadStore64 : MVCLoadStore<load, store, i64, MVCWrapper, 8>; + //===----------------------------------------------------------------------===// // Sign extensions //===----------------------------------------------------------------------===// @@ -339,6 +353,14 @@ def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>; def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>; def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; +// We want PC-relative addresses to be tried ahead of BD and BDX addresses. +// However, BDXs have two extra operands and are therefore 6 units more +// complex. +let AddedComplexity = 7 in { + def : Pat<(i32 (extloadi16 pcrel32:$src)), (LHRL pcrel32:$src)>; + def : Pat<(i64 (extloadi16 pcrel32:$src)), (LGHRL pcrel32:$src)>; +} + //===----------------------------------------------------------------------===// // Zero extensions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index fb6c221..74cc5f0 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -65,3 +65,17 @@ multiclass InsertMem<string type, Instruction insn, RegisterOperand cls, (load mode:$src2), cls:$src1), (insn cls:$src1, mode:$src2)>; } + +// Use MVC instruction INSN for a load of type LOAD followed by a store +// of type STORE. VT is the type of the intermediate register and LENGTH +// is the number of bytes to copy (which may be smaller than VT). +multiclass MVCLoadStore<SDPatternOperator load, SDPatternOperator store, + ValueType vt, Instruction insn, bits<5> length> { + def Pat : PatFrag<(ops node:$dest, node:$src), + (store (vt (load node:$src)), node:$dest), + [{ return storeLoadCanUseMVC(N); }]>; + + def : Pat<(!cast<SDPatternOperator>(NAME##"Pat") bdaddr12only:$dest, + bdaddr12only:$src), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} diff --git a/test/CodeGen/SystemZ/fp-move-02.ll b/test/CodeGen/SystemZ/fp-move-02.ll index 9d87797..c2bb931 100644 --- a/test/CodeGen/SystemZ/fp-move-02.ll +++ b/test/CodeGen/SystemZ/fp-move-02.ll @@ -58,12 +58,13 @@ define double @f5(i64 %a) { ; Test 128-bit moves from GPRs to FPRs. i128 isn't a legitimate type, ; so this goes through memory. +; FIXME: it would be better to use one MVC here. define void @f6(fp128 *%a, i128 *%b) { ; CHECK: f6: ; CHECK: lg -; CHECK: lg -; CHECK: stg +; CHECK: mvc ; CHECK: stg +; CHECK: br %r14 %val = load i128 *%b %res = bitcast i128 %val to fp128 store fp128 %res, fp128 *%a diff --git a/test/CodeGen/SystemZ/memcpy-02.ll b/test/CodeGen/SystemZ/memcpy-02.ll new file mode 100644 index 0000000..0b576a7 --- /dev/null +++ b/test/CodeGen/SystemZ/memcpy-02.ll @@ -0,0 +1,417 @@ +; Test load/store pairs that act as memcpys. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +@g1 = global i8 1 +@g2 = global i16 2 +@g3 = global i32 3 +@g4 = global i64 4 +@g5 = external global fp128, align 16 + +; Test the simple i8 case. +define void @f1(i8 *%ptr1) { +; CHECK: f1: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + store i8 %val, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is zero-extended to 32 bits. +define void @f2(i8 *%ptr1) { +; CHECK: f2: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %ext = zext i8 %val to i32 + %trunc = trunc i32 %ext to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is zero-extended to 64 bits. +define void @f3(i8 *%ptr1) { +; CHECK: f3: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %ext = zext i8 %val to i64 + %trunc = trunc i64 %ext to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is sign-extended to 32 bits. +define void @f4(i8 *%ptr1) { +; CHECK: f4: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %ext = sext i8 %val to i32 + %trunc = trunc i32 %ext to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test i8 cases where the value is sign-extended to 64 bits. +define void @f5(i8 *%ptr1) { +; CHECK: f5: +; CHECK: mvc 1(1,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i8 *%ptr1, i64 1 + %val = load i8 *%ptr1 + %ext = sext i8 %val to i64 + %trunc = trunc i64 %ext to i8 + store i8 %trunc, i8 *%ptr2 + ret void +} + +; Test the simple i16 case. +define void @f6(i16 *%ptr1) { +; CHECK: f6: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + store i16 %val, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is zero-extended to 32 bits. +define void @f7(i16 *%ptr1) { +; CHECK: f7: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %ext = zext i16 %val to i32 + %trunc = trunc i32 %ext to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is zero-extended to 64 bits. +define void @f8(i16 *%ptr1) { +; CHECK: f8: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %ext = zext i16 %val to i64 + %trunc = trunc i64 %ext to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is sign-extended to 32 bits. +define void @f9(i16 *%ptr1) { +; CHECK: f9: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %ext = sext i16 %val to i32 + %trunc = trunc i32 %ext to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test i16 cases where the value is sign-extended to 64 bits. +define void @f10(i16 *%ptr1) { +; CHECK: f10: +; CHECK: mvc 2(2,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i16 *%ptr1, i64 1 + %val = load i16 *%ptr1 + %ext = sext i16 %val to i64 + %trunc = trunc i64 %ext to i16 + store i16 %trunc, i16 *%ptr2 + ret void +} + +; Test the simple i32 case. +define void @f11(i32 *%ptr1) { +; CHECK: f11: +; CHECK: mvc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + store i32 %val, i32 *%ptr2 + ret void +} + +; Test i32 cases where the value is zero-extended to 64 bits. +define void @f12(i32 *%ptr1) { +; CHECK: f12: +; CHECK: mvc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + %ext = zext i32 %val to i64 + %trunc = trunc i64 %ext to i32 + store i32 %trunc, i32 *%ptr2 + ret void +} + +; Test i32 cases where the value is sign-extended to 64 bits. +define void @f13(i32 *%ptr1) { +; CHECK: f13: +; CHECK: mvc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i32 *%ptr1, i64 1 + %val = load i32 *%ptr1 + %ext = sext i32 %val to i64 + %trunc = trunc i64 %ext to i32 + store i32 %trunc, i32 *%ptr2 + ret void +} + +; Test the i64 case. +define void @f14(i64 *%ptr1) { +; CHECK: f14: +; CHECK: mvc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + store i64 %val, i64 *%ptr2 + ret void +} + +; Test the f32 case. +define void @f15(float *%ptr1) { +; CHECK: f15: +; CHECK: mvc 4(4,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr float *%ptr1, i64 1 + %val = load float *%ptr1 + store float %val, float *%ptr2 + ret void +} + +; Test the f64 case. +define void @f16(double *%ptr1) { +; CHECK: f16: +; CHECK: mvc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr double *%ptr1, i64 1 + %val = load double *%ptr1 + store double %val, double *%ptr2 + ret void +} + +; Test the f128 case. +define void @f17(fp128 *%ptr1) { +; CHECK: f17: +; CHECK: mvc 16(16,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr fp128 *%ptr1, i64 1 + %val = load fp128 *%ptr1 + store fp128 %val, fp128 *%ptr2 + ret void +} + +; Make sure that we don't use MVC if the load is volatile. +define void @f18(i64 *%ptr1) { +; CHECK: f18: +; CHECK-NOT: mvc +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load volatile i64 *%ptr1 + store i64 %val, i64 *%ptr2 + ret void +} + +; ...likewise the store. +define void @f19(i64 *%ptr1) { +; CHECK: f19: +; CHECK-NOT: mvc +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1 + store volatile i64 %val, i64 *%ptr2 + ret void +} + +; Test that MVC is used for aligned loads and stores, even if there is +; no way of telling whether they alias. +define void @f20(i64 *%ptr1, i64 *%ptr2) { +; CHECK: f20: +; CHECK: mvc 0(8,%r3), 0(%r2) +; CHECK: br %r14 + %val = load i64 *%ptr1 + store i64 %val, i64 *%ptr2 + ret void +} + +; ...but if the loads aren't aligned, we can't be sure. +define void @f21(i64 *%ptr1, i64 *%ptr2) { +; CHECK: f21: +; CHECK-NOT: mvc +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2 + store i64 %val, i64 *%ptr2, align 2 + ret void +} + +; Test a case where there is definite overlap. +define void @f22(i64 %base) { +; CHECK: f22: +; CHECK-NOT: mvc +; CHECK: br %r14 + %add = add i64 %base, 1 + %ptr1 = inttoptr i64 %base to i64 * + %ptr2 = inttoptr i64 %add to i64 * + %val = load i64 *%ptr1, align 1 + store i64 %val, i64 *%ptr2, align 1 + ret void +} + +; Test that we can use MVC for global addresses for i8. +define void @f23(i8 *%ptr) { +; CHECK: f23: +; CHECK: larl [[REG:%r[0-5]]], g1 +; CHECK: mvc 0(1,%r2), 0([[REG]]) +; CHECK: br %r14 + %val = load i8 *@g1 + store i8 %val, i8 *%ptr + ret void +} + +; ...and again with the global on the store. +define void @f24(i8 *%ptr) { +; CHECK: f24: +; CHECK: larl [[REG:%r[0-5]]], g1 +; CHECK: mvc 0(1,[[REG]]), 0(%r2) +; CHECK: br %r14 + %val = load i8 *%ptr + store i8 %val, i8 *@g1 + ret void +} + +; Test that we use LHRL for i16. +define void @f25(i16 *%ptr) { +; CHECK: f25: +; CHECK: lhrl [[REG:%r[0-5]]], g2 +; CHECK: sth [[REG]], 0(%r2) +; CHECK: br %r14 + %val = load i16 *@g2 + store i16 %val, i16 *%ptr + ret void +} + +; ...likewise STHRL. +define void @f26(i16 *%ptr) { +; CHECK: f26: +; CHECK: lh [[REG:%r[0-5]]], 0(%r2) +; CHECK: sthrl [[REG]], g2 +; CHECK: br %r14 + %val = load i16 *%ptr + store i16 %val, i16 *@g2 + ret void +} + +; Test that we use LRL for i32. +define void @f27(i32 *%ptr) { +; CHECK: f27: +; CHECK: lrl [[REG:%r[0-5]]], g3 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %val = load i32 *@g3 + store i32 %val, i32 *%ptr + ret void +} + +; ...likewise STRL. +define void @f28(i32 *%ptr) { +; CHECK: f28: +; CHECK: l [[REG:%r[0-5]]], 0(%r2) +; CHECK: strl [[REG]], g3 +; CHECK: br %r14 + %val = load i32 *%ptr + store i32 %val, i32 *@g3 + ret void +} + +; Test that we use LGRL for i64. +define void @f29(i64 *%ptr) { +; CHECK: f29: +; CHECK: lgrl [[REG:%r[0-5]]], g4 +; CHECK: stg [[REG]], 0(%r2) +; CHECK: br %r14 + %val = load i64 *@g4 + store i64 %val, i64 *%ptr + ret void +} + +; ...likewise STGRL. +define void @f30(i64 *%ptr) { +; CHECK: f30: +; CHECK: lg [[REG:%r[0-5]]], 0(%r2) +; CHECK: stgrl [[REG]], g4 +; CHECK: br %r14 + %val = load i64 *%ptr + store i64 %val, i64 *@g4 + ret void +} + +; Test that we can use MVC for global addresses for fp128. +define void @f31(fp128 *%ptr) { +; CHECK: f31: +; CHECK: larl [[REG:%r[0-5]]], g5 +; CHECK: mvc 0(16,%r2), 0([[REG]]) +; CHECK: br %r14 + %val = load fp128 *@g5, align 16 + store fp128 %val, fp128 *%ptr, align 16 + ret void +} + +; ...and again with the global on the store. +define void @f32(fp128 *%ptr) { +; CHECK: f32: +; CHECK: larl [[REG:%r[0-5]]], g5 +; CHECK: mvc 0(16,[[REG]]), 0(%r2) +; CHECK: br %r14 + %val = load fp128 *%ptr, align 16 + store fp128 %val, fp128 *@g5, align 16 + ret void +} + +; Test a case where offset disambiguation is enough. +define void @f33(i64 *%ptr1) { +; CHECK: f33: +; CHECK: mvc 8(8,%r2), 0(%r2) +; CHECK: br %r14 + %ptr2 = getelementptr i64 *%ptr1, i64 1 + %val = load i64 *%ptr1, align 1 + store i64 %val, i64 *%ptr2, align 1 + ret void +} + +; Test f21 in cases where TBAA tells us there is no alias. +define void @f34(i64 *%ptr1, i64 *%ptr2) { +; CHECK: f34: +; CHECK: mvc 0(8,%r3), 0(%r2) +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2, !tbaa !1 + store i64 %val, i64 *%ptr2, align 2, !tbaa !2 + ret void +} + +; Test f21 in cases where TBAA is present but doesn't help. +define void @f35(i64 *%ptr1, i64 *%ptr2) { +; CHECK: f35: +; CHECK-NOT: mvc +; CHECK: br %r14 + %val = load i64 *%ptr1, align 2, !tbaa !1 + store i64 %val, i64 *%ptr2, align 2, !tbaa !1 + ret void +} + +!0 = metadata !{ metadata !"root" } +!1 = metadata !{ metadata !"set1", metadata !0 } +!2 = metadata !{ metadata !"set2", metadata !0 } |