diff options
author | Craig Topper <craig.topper@gmail.com> | 2012-08-06 06:22:36 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2012-08-06 06:22:36 +0000 |
commit | 4feb647283db0ea4660941d3ac4202947b1ce196 (patch) | |
tree | f0023cf9921ed257e256469d86369b72bab65d70 /lib/Target/X86 | |
parent | d230d20b73948f535ec11a24cdabe7059cb8cf29 (diff) | |
download | external_llvm-4feb647283db0ea4660941d3ac4202947b1ce196.zip external_llvm-4feb647283db0ea4660941d3ac4202947b1ce196.tar.gz external_llvm-4feb647283db0ea4660941d3ac4202947b1ce196.tar.bz2 |
Implement proper handling for pcmpistri/pcmpestri intrinsics. Requires custom handling in DAGISelToDAG due to limitations in TableGen's implicit def handling. Fixes PR11305.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161318 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 79 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 77 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 71 |
5 files changed, 187 insertions, 55 deletions
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 0d35b88..3160758 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2600,6 +2600,85 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } + + // FIXME: Custom handling because TableGen doesn't support multiple implicit + // defs in an instruction pattern + case X86ISD::PCMPESTRI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + SDValue N3 = Node->getOperand(3); + SDValue N4 = Node->getOperand(4); + + // Make sure last argument is a constant + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4); + if (!Cst) + break; + + uint64_t Imm = Cst->getZExtValue(); + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, + X86::EAX, N1, SDValue()).getValue(1); + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, + N3, InFlag).getValue(1); + + SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; + unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : + X86::PCMPESTRIrr; + InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, + array_lengthof(Ops)), 0); + + if (!SDValue(Node, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::ECX, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 0), Result); + } + if (!SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::EFLAGS, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 1), Result); + } + + return NULL; + } + + // FIXME: Custom handling because TableGen doesn't support multiple implicit + // defs in an instruction pattern + case X86ISD::PCMPISTRI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + + // Make sure last argument is a constant + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2); + if (!Cst) + break; + + uint64_t Imm = Cst->getZExtValue(); + + SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; + unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : + X86::PCMPISTRIrr; + SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, + array_lengthof(Ops)), 0); + + if (!SDValue(Node, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::ECX, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 0), Result); + } + if (!SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::EFLAGS, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 1), Result); + } + + return NULL; + } } SDNode *ResNode = SelectCode(Node); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 041ae15..8125b4c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9845,6 +9845,83 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); } + case Intrinsic::x86_sse42_pcmpistria128: + case Intrinsic::x86_sse42_pcmpestria128: + case Intrinsic::x86_sse42_pcmpistric128: + case Intrinsic::x86_sse42_pcmpestric128: + case Intrinsic::x86_sse42_pcmpistrio128: + case Intrinsic::x86_sse42_pcmpestrio128: + case Intrinsic::x86_sse42_pcmpistris128: + case Intrinsic::x86_sse42_pcmpestris128: + case Intrinsic::x86_sse42_pcmpistriz128: + case Intrinsic::x86_sse42_pcmpestriz128: { + unsigned Opcode; + unsigned X86CC; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse42_pcmpistria128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_A; + break; + case Intrinsic::x86_sse42_pcmpestria128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_A; + break; + case Intrinsic::x86_sse42_pcmpistric128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse42_pcmpestric128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse42_pcmpistrio128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_O; + break; + case Intrinsic::x86_sse42_pcmpestrio128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_O; + break; + case Intrinsic::x86_sse42_pcmpistris128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_S; + break; + case Intrinsic::x86_sse42_pcmpestris128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_S; + break; + case Intrinsic::x86_sse42_pcmpistriz128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_E; + break; + case Intrinsic::x86_sse42_pcmpestriz128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_E; + break; + } + SmallVector<SDValue, 5> NewOps; + NewOps.append(Op->op_begin()+1, Op->op_end()); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), + SDValue(PCMP.getNode(), 1)); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } + case Intrinsic::x86_sse42_pcmpistri128: + case Intrinsic::x86_sse42_pcmpestri128: { + unsigned Opcode; + if (IntNo == Intrinsic::x86_sse42_pcmpistri128) + Opcode = X86ISD::PCMPISTRI; + else + Opcode = X86ISD::PCMPESTRI; + + SmallVector<SDValue, 5> NewOps; + NewOps.append(Op->op_begin()+1, Op->op_end()); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + } } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1b2d3dd..b84af82 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -333,6 +333,10 @@ namespace llvm { // RDRAND - Get a random integer and indicate whether it is valid in CF. RDRAND, + // PCMP*STRI + PCMPISTRI, + PCMPESTRI, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 19de855..d13167b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -173,6 +173,17 @@ def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>; def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>; +def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, + SDTCisVT<4, i8>]>; +def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, v16i8>, SDTCisVT<3, i32>, + SDTCisVT<4, v16i8>, SDTCisVT<5, i32>, + SDTCisVT<6, i8>]>; + +def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; +def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index edb7f6c..e4c35b9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6841,81 +6841,42 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { } // Packed Compare Implicit Length Strings, Return Index -let Defs = [ECX, EFLAGS] in { - multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> { +let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { + multiclass SS42AI_pcmpistri<string asm> { def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; + let mayLoad = 1 in def rm : SS42AI<0x63, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; } } -let Predicates = [HasAVX] in { -defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">, - VEX; -defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">, - VEX; -defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">, - VEX; -defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">, - VEX; -defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">, - VEX; -defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">, - VEX; -} - -defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; -defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; -defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; -defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; -defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; -defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; +let Predicates = [HasAVX] in +defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; +defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; // Packed Compare Explicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { - multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> { +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { + multiclass SS42AI_pcmpestri<string asm> { def rr : SS42AI<0x61, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; + let mayLoad = 1 in def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; } } -let Predicates = [HasAVX] in { -defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">, - VEX; -defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">, - VEX; -defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">, - VEX; -defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">, - VEX; -defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">, - VEX; -defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">, - VEX; -} - -defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; -defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; -defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; -defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; -defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; -defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; +let Predicates = [HasAVX] in +defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; +defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; //===----------------------------------------------------------------------===// // SSE4.2 - CRC Instructions |