diff options
author | Nate Begeman <natebegeman@mac.com> | 2008-07-25 19:05:58 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2008-07-25 19:05:58 +0000 |
commit | 6357f9df4579d1d498aed8967e6ea2391606da34 (patch) | |
tree | 9cbdf23a142bf9fbeacc8a96409b2521513ac791 | |
parent | bbc2af03524f2db2155b9ff484d2dfd96115c862 (diff) | |
download | external_llvm-6357f9df4579d1d498aed8967e6ea2391606da34.zip external_llvm-6357f9df4579d1d498aed8967e6ea2391606da34.tar.gz external_llvm-6357f9df4579d1d498aed8967e6ea2391606da34.tar.bz2 |
Disable mov{L, LP, HP, HLP, *DUP} shuffles for mmx
mmx needs its own fancy shuffle logic based on unpack; for now we get correct but awful code.
Also commit Mon Ping's VSETCC patch
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54039 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 38 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_insert-7.ll | 8 |
2 files changed, 31 insertions, 15 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e8e1b2e..00c8eea 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3881,14 +3881,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { return V2; if (ISD::isBuildVectorAllZeros(V1.Val)) return getVZextMovL(VT, VT, V2, DAG, Subtarget); - return Op; + if (!isMMX) + return Op; } - if (X86::isMOVSHDUPMask(PermMask.Val) || - X86::isMOVSLDUPMask(PermMask.Val) || - X86::isMOVHLPSMask(PermMask.Val) || - X86::isMOVHPMask(PermMask.Val) || - X86::isMOVLPMask(PermMask.Val)) + if (!isMMX && (X86::isMOVSHDUPMask(PermMask.Val) || + X86::isMOVSLDUPMask(PermMask.Val) || + X86::isMOVHLPSMask(PermMask.Val) || + X86::isMOVHPMask(PermMask.Val) || + X86::isMOVLPMask(PermMask.Val))) return Op; if (ShouldXformToMOVHLPS(PermMask.Val) || @@ -4772,6 +4773,7 @@ SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) { switch (SetCCOpcode) { default: break; + case ISD::SETOEQ: case ISD::SETEQ: SSECC = 0; break; case ISD::SETOGT: case ISD::SETGT: Swap = true; // Fallthrough @@ -4782,7 +4784,7 @@ SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) { case ISD::SETLE: case ISD::SETOLE: SSECC = 2; break; case ISD::SETUO: SSECC = 3; break; - case ISD::SETONE: + case ISD::SETUNE: case ISD::SETNE: SSECC = 4; break; case ISD::SETULE: Swap = true; case ISD::SETUGE: SSECC = 5; break; @@ -4793,15 +4795,21 @@ SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) { if (Swap) std::swap(Op0, Op1); - // In the one special case we can't handle, emit two comparisons. + // In the two special cases we can't handle, emit two comparisons. if (SSECC == 8) { - SDOperand UNORD, EQ; - - assert(SetCCOpcode == ISD::SETUEQ && "Illegal FP comparison"); - - UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); - EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); - return DAG.getNode(ISD::OR, VT, UNORD, EQ); + if (SetCCOpcode == ISD::SETUEQ) { + SDOperand UNORD, EQ; + UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); + EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); + return DAG.getNode(ISD::OR, VT, UNORD, EQ); + } + else if (SetCCOpcode == ISD::SETONE) { + SDOperand ORD, NEQ; + ORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); + NEQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); + return DAG.getNode(ISD::AND, VT, ORD, NEQ); + } + assert(0 && "Illegal FP comparison"); } // Handle all other FP comparisons here. return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll new file mode 100644 index 0000000..93af9f4 --- /dev/null +++ b/test/CodeGen/X86/vec_insert-7.ll @@ -0,0 +1,8 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep punpckldq + +define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind { +entry: + %tmp3 = insertelement <2 x i32> %x, i32 32, i32 0 ; <<2 x i32>> [#uses=1] + %tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1 ; <<2 x i32>> [#uses=1] + ret <2 x i32> %tmp8 +} |