diff options
author | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-08-12 10:28:10 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-08-12 10:28:10 +0000 |
commit | ac168b8bc8773a083a10902f64e4ae57a925aee4 (patch) | |
tree | f01bc6788d39669344e52a5304fbe7859ebc8702 /lib | |
parent | e03a56d62fc623e2f72d623b816f91b293d5904b (diff) | |
download | external_llvm-ac168b8bc8773a083a10902f64e4ae57a925aee4.zip external_llvm-ac168b8bc8773a083a10902f64e4ae57a925aee4.tar.gz external_llvm-ac168b8bc8773a083a10902f64e4ae57a925aee4.tar.bz2 |
[SystemZ] Use CLC and IPM to implement memcmp
For now this is restricted to fixed-length comparisons with a length
in the range [1, 256], as for memcpy() and MVC.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188163 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 21 | ||||
-rw-r--r-- | lib/Target/SystemZ/README.txt | 6 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.cpp | 12 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrInfo.cpp | 93 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrInfo.h | 6 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZInstrInfo.td | 2 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZOperators.td | 5 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 27 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 8 |
10 files changed, 176 insertions, 12 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e23c5be..1101ee1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -58,6 +58,7 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> using namespace llvm; @@ -5463,6 +5464,26 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); + if (Size && Size->getZExtValue() == 0) { + EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + setValue(&I, DAG.getConstant(0, CallVT)); + return true; + } + + const Value *Arg0 = I.getArgOperand(0); + const Value *Arg1 = I.getArgOperand(1); + const Value *Arg2 = I.getArgOperand(2); + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), getValue(Arg2), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + DAG.setRoot(Res.second); + return true; + } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 563513b..eebc4e4 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -67,12 +67,12 @@ condition codes. For example, we could use LCDFR instead of LCDBR. -- We don't optimize block memory operations, except using single MVCs -for memcpy. +for memcpy and single CLCs for memcmp. -It's definitely worth using things like CLC, NC, XC and OC with +It's definitely worth using things like NC, XC and OC with constant lengths. MVCIN may be worthwhile too. -We should probably implement things like memcpy using MVC with EXECUTE. +We should probably implement general memcpy using MVC with EXECUTE. Likewise memcmp and CLC. MVCLE and CLCLE could be useful too. -- diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index a51f016..899b08c 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1702,6 +1702,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(UDIVREM64); OPCODE(MVC); OPCODE(CLC); + OPCODE(IPM); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -2240,8 +2241,9 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, } MachineBasicBlock * -SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI, - MachineBasicBlock *MBB) const { +SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { const SystemZInstrInfo *TII = TM.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -2251,7 +2253,7 @@ SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI, uint64_t SrcDisp = MI->getOperand(3).getImm(); uint64_t Length = MI->getOperand(4).getImm(); - BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC)) + BuildMI(*MBB, MI, DL, TII->get(Opcode)) .addOperand(DestBase).addImm(DestDisp).addImm(Length) .addOperand(SrcBase).addImm(SrcDisp); @@ -2483,7 +2485,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::ATOMIC_CMP_SWAPW: return emitAtomicCmpSwapW(MI, MBB); case SystemZ::MVCWrapper: - return emitMVCWrapper(MI, MBB); + return emitMemMemWrapper(MI, MBB, SystemZ::MVC); + case SystemZ::CLCWrapper: + return emitMemMemWrapper(MI, MBB, SystemZ::CLC); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 4098ff3..0036ce8 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -84,6 +84,9 @@ namespace SystemZISD { // as for MVC. CLC, + // Store the CC value in bits 29 and 28 of an integer. + IPM, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_<op>. // @@ -234,8 +237,9 @@ private: unsigned BitSize) const; MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitMVCWrapper(MachineInstr *MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 9ee60aa..54a8669 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -293,6 +293,99 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } +bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + assert(MI->isCompare() && "Caller should have checked for a comparison"); + + if (MI->getNumExplicitOperands() == 2 && + MI->getOperand(0).isReg() && + MI->getOperand(1).isImm()) { + SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; + Value = MI->getOperand(1).getImm(); + Mask = ~0; + return true; + } + + return false; +} + +// If Reg is a virtual register that is used by only a single non-debug +// instruction, return the defining instruction, otherwise return null. +static MachineInstr *getDefSingleUse(const MachineRegisterInfo *MRI, + unsigned Reg) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return 0; + + MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); + MachineRegisterInfo::use_nodbg_iterator E = MRI->use_nodbg_end(); + if (I == E || llvm::next(I) != E) + return 0; + + return MRI->getUniqueVRegDef(Reg); +} + +// Return true if MI is a shift of type Opcode by Imm bits. +static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) { + return (MI->getOpcode() == Opcode && + !MI->getOperand(2).getReg() && + MI->getOperand(3).getImm() == Imm); +} + +// Compare compares SrcReg against zero. Check whether SrcReg contains +// the result of an IPM sequence that is only used by Compare. Try to +// delete both of them if so and return true if a change was made. +static bool removeIPM(MachineInstr *Compare, unsigned SrcReg, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + MachineInstr *SRA = getDefSingleUse(MRI, SrcReg); + if (!SRA || !isShift(SRA, SystemZ::SRA, 30)) + return false; + + MachineInstr *SLL = getDefSingleUse(MRI, SRA->getOperand(1).getReg()); + if (!SLL || !isShift(SLL, SystemZ::SLL, 2)) + return false; + + MachineInstr *IPM = getDefSingleUse(MRI, SLL->getOperand(1).getReg()); + if (!IPM || IPM->getOpcode() != SystemZ::IPM) + return false; + + // Check that there are no assignments to CC between the IPM and Compare, + // except for the SRA that we'd like to delete. We can ignore SLL because + // it does not assign to CC. We can also ignore uses of the SRA CC result, + // since it is effectively restoring CC to the value it had before IPM + // (for all current use cases). + if (IPM->getParent() != Compare->getParent()) + return false; + MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MachineInstr *MI = MBBI; + if (MI != SRA && MI->modifiesRegister(SystemZ::CC, TRI)) + return false; + } + + IPM->eraseFromParent(); + SLL->eraseFromParent(); + SRA->eraseFromParent(); + Compare->eraseFromParent(); + return true; +} + +bool +SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const { + assert(!SrcReg2 && "Only optimizing constant comparisons so far"); + bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0; + if (Value == 0 && + !IsLogical && + removeIPM(Compare, SrcReg, MRI, TM.getRegisterInfo())) + return true; + return false; +} + // If Opcode is a move that has a conditional variant, return that variant, // otherwise return 0. static unsigned getConditionalMove(unsigned Opcode) { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 276fd3b..3c4e8af 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -129,6 +129,12 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const LLVM_OVERRIDE; + bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, int &Value) const + LLVM_OVERRIDE; + bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int Mask, int Value, + const MachineRegisterInfo *MRI) const LLVM_OVERRIDE; virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE; virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index a7181d6..834ffed 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1117,7 +1117,7 @@ let Defs = [CC] in { // Extract CC into bits 29 and 28 of a register. let Uses = [CC] in - def IPM : InherentRRE<"ipm", 0xB222, GR32, (null_frag)>; + def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>; // Read a 32-bit access register into a GR32. As with all GR32 operations, // the upper 32 bits of the enclosing GR64 remain unchanged, which is useful diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index dae04de..8a5b909 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -58,6 +58,7 @@ def SDT_ZMemMemLength : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; +def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -112,7 +113,9 @@ def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, + [SDNPInGlue]>; //===----------------------------------------------------------------------===// // Pattern fragments diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 4ca9292..341dc94 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -125,3 +125,30 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } return SDValue(); } + +std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: +EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes >= 1 && Bytes <= 0x100) { + // A single CLC. + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, + Src1, Src2, Size); + SDValue Glue = Chain.getValue(1); + // IPM inserts the CC value into bits 29 and 28, with 0 meaning "equal", + // 1 meaning "greater" and 2 meaning "less". Convert them into an + // integer that is respectively equal, greater or less than 0. + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM, + DAG.getConstant(2, MVT::i32)); + SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL, + DAG.getConstant(30, MVT::i32)); + return std::make_pair(SRA, Chain); + } + } + return std::make_pair(SDValue(), SDValue()); +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 9138a9c..c757e16 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -38,7 +38,13 @@ public: EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dst, SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, - MachinePointerInfo DstPtrInfo) const; + MachinePointerInfo DstPtrInfo) const LLVM_OVERRIDE; + + virtual std::pair<SDValue, SDValue> + EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; }; } |