aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNate Begeman <natebegeman@mac.com>2008-07-25 19:05:58 +0000
committerNate Begeman <natebegeman@mac.com>2008-07-25 19:05:58 +0000
commit6357f9df4579d1d498aed8967e6ea2391606da34 (patch)
tree9cbdf23a142bf9fbeacc8a96409b2521513ac791
parentbbc2af03524f2db2155b9ff484d2dfd96115c862 (diff)
downloadexternal_llvm-6357f9df4579d1d498aed8967e6ea2391606da34.zip
external_llvm-6357f9df4579d1d498aed8967e6ea2391606da34.tar.gz
external_llvm-6357f9df4579d1d498aed8967e6ea2391606da34.tar.bz2
Disable mov{L, LP, HP, HLP, *DUP} shuffles for mmx
mmx needs its own fancy shuffle logic based on unpack; for now we get correct but awful code. Also commit Mon Ping's VSETCC patch git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54039 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp38
-rw-r--r--test/CodeGen/X86/vec_insert-7.ll8
2 files changed, 31 insertions, 15 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index e8e1b2e..00c8eea 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3881,14 +3881,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
return V2;
if (ISD::isBuildVectorAllZeros(V1.Val))
return getVZextMovL(VT, VT, V2, DAG, Subtarget);
- return Op;
+ if (!isMMX)
+ return Op;
}
- if (X86::isMOVSHDUPMask(PermMask.Val) ||
- X86::isMOVSLDUPMask(PermMask.Val) ||
- X86::isMOVHLPSMask(PermMask.Val) ||
- X86::isMOVHPMask(PermMask.Val) ||
- X86::isMOVLPMask(PermMask.Val))
+ if (!isMMX && (X86::isMOVSHDUPMask(PermMask.Val) ||
+ X86::isMOVSLDUPMask(PermMask.Val) ||
+ X86::isMOVHLPSMask(PermMask.Val) ||
+ X86::isMOVHPMask(PermMask.Val) ||
+ X86::isMOVLPMask(PermMask.Val)))
return Op;
if (ShouldXformToMOVHLPS(PermMask.Val) ||
@@ -4772,6 +4773,7 @@ SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) {
switch (SetCCOpcode) {
default: break;
+ case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETGT: Swap = true; // Fallthrough
@@ -4782,7 +4784,7 @@ SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) {
case ISD::SETLE:
case ISD::SETOLE: SSECC = 2; break;
case ISD::SETUO: SSECC = 3; break;
- case ISD::SETONE:
+ case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
case ISD::SETULE: Swap = true;
case ISD::SETUGE: SSECC = 5; break;
@@ -4793,15 +4795,21 @@ SDOperand X86TargetLowering::LowerVSETCC(SDOperand Op, SelectionDAG &DAG) {
if (Swap)
std::swap(Op0, Op1);
- // In the one special case we can't handle, emit two comparisons.
+ // In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
- SDOperand UNORD, EQ;
-
- assert(SetCCOpcode == ISD::SETUEQ && "Illegal FP comparison");
-
- UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
- EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
- return DAG.getNode(ISD::OR, VT, UNORD, EQ);
+ if (SetCCOpcode == ISD::SETUEQ) {
+ SDOperand UNORD, EQ;
+ UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
+ EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+ return DAG.getNode(ISD::OR, VT, UNORD, EQ);
+ }
+ else if (SetCCOpcode == ISD::SETONE) {
+ SDOperand ORD, NEQ;
+ ORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
+ NEQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+ return DAG.getNode(ISD::AND, VT, ORD, NEQ);
+ }
+ assert(0 && "Illegal FP comparison");
}
// Handle all other FP comparisons here.
return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
new file mode 100644
index 0000000..93af9f4
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep punpckldq
+
+define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind {
+entry:
+ %tmp3 = insertelement <2 x i32> %x, i32 32, i32 0 ; <<2 x i32>> [#uses=1]
+ %tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1 ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %tmp8
+}