diff options
-rw-r--r-- | include/llvm/Target/TargetLibraryInfo.h | 1 | ||||
-rw-r--r-- | include/llvm/Target/TargetSelectionDAGInfo.h | 12 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 35 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 1 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZ.h | 5 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.cpp | 7 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 30 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 5 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/memchr-01.ll | 21 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/memchr-02.ll | 57 |
10 files changed, 171 insertions, 3 deletions
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h index 50b16a3..9111cb1 100644 --- a/include/llvm/Target/TargetLibraryInfo.h +++ b/include/llvm/Target/TargetLibraryInfo.h @@ -701,6 +701,7 @@ public: case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l: case LibFunc::memcmp: case LibFunc::strcmp: case LibFunc::strcpy: case LibFunc::stpcpy: case LibFunc::strlen: case LibFunc::strnlen: + case LibFunc::memchr: return true; } return false; diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h index 138ff48..3474ee4 100644 --- a/include/llvm/Target/TargetSelectionDAGInfo.h +++ b/include/llvm/Target/TargetSelectionDAGInfo.h @@ -109,6 +109,18 @@ public: return std::make_pair(SDValue(), SDValue()); } + /// EmitTargetCodeForMemchr - Emit target-specific code that performs a + /// memchr, in cases where that is faster than a libcall. The first + /// returned SDValue is the result of the memchr and the second is + /// the chain. Both SDValues can be null if a normal libcall should + /// be used. + virtual std::pair<SDValue, SDValue> + EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc dl, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const { + return std::make_pair(SDValue(), SDValue()); + } + /// EmitTargetCodeForStrcpy - Emit target-specific code that performs a /// strcpy or stpcpy, in cases where that is faster than a libcall. /// The first returned SDValue is the result of the copy (the start diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6feca81..77a3c51 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5647,6 +5647,37 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitMemChrCall -- See if we can lower a memchr call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { + // Verify that the prototype makes sense. void *memchr(void *, int, size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *Src = I.getArgOperand(0); + const Value *Char = I.getArgOperand(1); + const Value *Length = I.getArgOperand(2); + if (!Src->getType()->isPointerTy() || + !Char->getType()->isIntegerTy() || + !Length->getType()->isIntegerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Src), getValue(Char), getValue(Length), + MachinePointerInfo(Src)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + /// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an /// optimized form. If so, return true and lower it, otherwise return false /// and it will be lowered like a normal call. @@ -5904,6 +5935,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitMemCmpCall(I)) return; break; + case LibFunc::memchr: + if (visitMemChrCall(I)) + return; + break; case LibFunc::strcpy: if (visitStrCpyCall(I, false)) return; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 1fe00e0..e995424 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -724,6 +724,7 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitMemChrCall(const CallInst &I); bool visitStrCpyCall(const CallInst &I, bool isStpcpy); bool visitStrCmpCall(const CallInst &I); bool visitStrLenCall(const CallInst &I); diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index eccc2aa..bb6ceac 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -52,6 +52,11 @@ namespace llvm { const unsigned CCMASK_CS_NE = CCMASK_1; const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1; + // Condition-code mask assignments for a completed SRST loop. + const unsigned CCMASK_SRST_FOUND = CCMASK_1; + const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2; + const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2; + // Return true if Val fits an LLILL operand. static inline bool isImmLL(uint64_t Val) { return (Val & ~0x000000000000ffffULL) == 0; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 0000485..6710f89 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2345,19 +2345,19 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); // StartMBB: - // R0W = %CharReg // # fall through to LoopMMB - BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0W).addReg(CharReg); MBB->addSuccessor(LoopMBB); // LoopMBB: // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] + // R0W = %CharReg // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0W // JO LoopMBB // # fall through to DoneMMB + // + // The load of R0W can be hoisted by post-RA LICM. MBB = LoopMBB; - MBB->addLiveIn(SystemZ::R0W); BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) .addReg(Start1Reg).addMBB(StartMBB) @@ -2365,6 +2365,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) .addReg(Start2Reg).addMBB(StartMBB) .addReg(End2Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0W).addReg(CharReg); BuildMI(MBB, DL, TII->get(Opcode)) .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) .addReg(This1Reg).addReg(This2Reg); diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 73bd480..638c3ee 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -159,6 +159,36 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const { + // Use SRST to find the character. End is its address on success. + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); + Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); + Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, + DAG.getConstant(255, MVT::i32)); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, Char); + Chain = End.getValue(1); + SDValue Glue = End.getValue(2); + + // Now select between End and null, depending on whether the character + // was found. + SmallVector<SDValue, 5> Ops; + Ops.push_back(End); + Ops.push_back(DAG.getConstant(0, PtrVT)); + Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32)); + Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32)); + Ops.push_back(Glue); + VTs = DAG.getVTList(PtrVT, MVT::Glue); + End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); + return std::make_pair(End, Chain); +} + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dest, SDValue Src, MachinePointerInfo DestPtrInfo, diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 26d6488..281d1e2 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -47,6 +47,11 @@ public: MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; virtual std::pair<SDValue, SDValue> + EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE; + + virtual std::pair<SDValue, SDValue> EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dest, SDValue Src, MachinePointerInfo DestPtrInfo, diff --git a/test/CodeGen/SystemZ/memchr-01.ll b/test/CodeGen/SystemZ/memchr-01.ll new file mode 100644 index 0000000..c51690b --- /dev/null +++ b/test/CodeGen/SystemZ/memchr-01.ll @@ -0,0 +1,21 @@ +; Test memchr using SRST, with a weird but usable prototype. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@memchr(i8 *%src, i16 %char, i32 %len) + +; Test a simple forwarded call. +define i8 *@f1(i8 *%src, i16 %char, i32 %len) { +; CHECK-LABEL: f1: +; CHECK-DAG: lgr [[REG:%r[1-5]]], %r2 +; CHECK-DAG: algfr %r2, %r4 +; CHECK-DAG: llcr %r0, %r3 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: srst %r2, [[REG]] +; CHECK-NEXT: jo [[LABEL]] +; CHECK: jl {{\.L.*}} +; CHECK: lghi %r2, 0 +; CHECK: br %r14 + %res = call i8 *@memchr(i8 *%src, i16 %char, i32 %len) + ret i8 *%res +} diff --git a/test/CodeGen/SystemZ/memchr-02.ll b/test/CodeGen/SystemZ/memchr-02.ll new file mode 100644 index 0000000..982b396 --- /dev/null +++ b/test/CodeGen/SystemZ/memchr-02.ll @@ -0,0 +1,57 @@ +; Test memchr using SRST, with the correct prototype. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@memchr(i8 *%src, i32 %char, i64 %len) + +; Test a simple forwarded call. +define i8 *@f1(i64 %len, i8 *%src, i32 %char) { +; CHECK-LABEL: f1: +; CHECK-DAG: agr %r2, %r3 +; CHECK-DAG: llcr %r0, %r4 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: srst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK: jl {{\.L.*}} +; CHECK: lghi %r2, 0 +; CHECK: br %r14 + %res = call i8 *@memchr(i8 *%src, i32 %char, i64 %len) + ret i8 *%res +} + +; Test a doubled call with no use of %r0 in between. There should be a +; single load of %r0. +define i8 *@f2(i8 *%src, i8 *%charptr, i64 %len) { +; CHECK-LABEL: f2: +; CHECK: llc %r0, 0(%r3) +; CHECK-NOT: %r0 +; CHECK: srst [[RES1:%r[1-5]]], %r2 +; CHECK-NOT: %r0 +; CHECK: srst %r2, [[RES1]] +; CHECK: br %r14 + %char = load volatile i8 *%charptr + %charext = zext i8 %char to i32 + %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len) + %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len) + ret i8 *%res2 +} + +; Test a doubled call with a use of %r0 in between. %r0 must be loaded +; for each loop. +define i8 *@f3(i8 *%src, i8 *%charptr, i64 %len) { +; CHECK-LABEL: f3: +; CHECK: llc [[CHAR:%r[1-5]]], 0(%r3) +; CHECK: lr %r0, [[CHAR]] +; CHECK: srst [[RES1:%r[1-5]]], %r2 +; CHECK: lhi %r0, 0 +; CHECK: blah %r0 +; CHECK: lr %r0, [[CHAR]] +; CHECK: srst %r2, [[RES1]] +; CHECK: br %r14 + %char = load volatile i8 *%charptr + %charext = zext i8 %char to i32 + %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len) + call void asm sideeffect "blah $0", "{r0}" (i32 0) + %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len) + ret i8 *%res2 +} |