diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-03-24 21:52:23 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-03-24 21:52:23 +0000 |
commit | 62a3f1538cf50f0373c2a5eeb440d6288604f969 (patch) | |
tree | de5c3aaf38da4e7959c9eb52eb8ed4a68f88a42d /lib | |
parent | aec960038920c206505268eb2e8f0849364124fe (diff) | |
download | external_llvm-62a3f1538cf50f0373c2a5eeb440d6288604f969.zip external_llvm-62a3f1538cf50f0373c2a5eeb440d6288604f969.tar.gz external_llvm-62a3f1538cf50f0373c2a5eeb440d6288604f969.tar.bz2 |
- SSE4.1 extractfps extracts a f32 into a gr32 register. Very useful! Not. Fix the instruction specification and teaches lowering code to use it only when the only use is a store instruction.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48746 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 22 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 13 |
2 files changed, 27 insertions, 8 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dd2d784..1d72e1f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -699,7 +699,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); @@ -3718,6 +3718,19 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op, SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, VT, Assert); + } else if (VT == MVT::f32) { + // EXTRACTPS outputs to a GPR32 register which will require a movd to copy + // the result back to FR32 register. It's only worth matching if the + // result has a single use which is a store. + if (!Op.hasOneUse()) + return SDOperand(); + SDNode *User = *Op.Val->use_begin(); + if (User->getOpcode() != ISD::STORE) + return SDOperand(); + SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)), + Op.getOperand(1)); + return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract); } return SDOperand(); } @@ -3728,8 +3741,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { if (!isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); - if (Subtarget->hasSSE41()) - return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); + if (Subtarget->hasSSE41()) { + SDOperand Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); + if (Res.Val) + return Res; + } MVT::ValueType VT = Op.getValueType(); // TODO: handle v16i8. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 787414b..9a3b2f6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3380,19 +3380,22 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { defm PEXTRD : SS41I_extract32<0x16, "pextrd">; -/// SS41I_extractf32 - SSE 4.1 extract 32 bits to fp reg or memory destination +/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory +/// destination multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs FR32:$dst), + // Not worth matching to rr form of extractps since the result is in GPR32. + def rr : SS4AIi8<opc, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set FR32:$dst, - (extractelt (v4f32 VR128:$src1), imm:$src2))]>, OpSize; + [/*(set GR32:$dst, + (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))*/]>, + OpSize; def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(store (extractelt (v4f32 VR128:$src1), imm:$src2), + [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), addr:$dst)]>, OpSize; } |