diff options
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5c411b8..aa95c8d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -788,7 +788,7 @@ def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), [(set VR128:$dst, (v4f32 (vector_shuffle VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), - MOVLP_shuffle_mask)))]>, Cost<20>; + MOVLP_shuffle_mask)))]>; def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), "movlpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -800,7 +800,7 @@ def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), [(set VR128:$dst, (v4f32 (vector_shuffle VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), - MOVHP_shuffle_mask)))]>, Cost<20>; + MOVHP_shuffle_mask)))]>; def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), "movhpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -2353,22 +2353,6 @@ def : Pat<(v8i16 (X86zexts2vec R16:$src)), def : Pat<(v16i8 (X86zexts2vec R8:$src)), (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>; -// MOVLP{S|D}rm / MOVHP{S|D}rm. -let AddedCost = 10 in { -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (loadv4f32 addr:$src2), - MOVLP_shuffle_mask)), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (vector_shuffle VR128:$src1, (loadv2f64 addr:$src2), - MOVLP_shuffle_mask)), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (vector_shuffle VR128:$src1, (loadv4f32 addr:$src2), - MOVHP_shuffle_mask)), - (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (vector_shuffle VR128:$src1, (loadv2f64 addr:$src2), - MOVHP_shuffle_mask)), - (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -} - // Splat v2f64 / v2i64 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_v2_mask:$sm), (v2f64 (UNPCKLPDrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>; @@ -2431,13 +2415,34 @@ def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (loadv2i64 addr:$src)), (undef), MOVSLDUP_shuffle_mask)), (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; -// vector_shuffle v1, v2 <4, 1, 2, 3> +// vector_shuffle v1, v2 <4, 1, 2, 3> using MOV{H|L}P{S|D} +let AddedCost = 10 in { +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (loadv4f32 addr:$src2), + MOVLP_shuffle_mask)), + (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (loadv2f64 addr:$src2), + MOVLP_shuffle_mask)), + (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (loadv4f32 addr:$src2), + MOVHP_shuffle_mask)), + (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (loadv2f64 addr:$src2), + MOVHP_shuffle_mask)), + (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, MOVS_shuffle_mask)), - (MOVLPSrr VR128:$src1, VR128:$src2)>; + (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)), + MOVLP_shuffle_mask)), + (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, MOVS_shuffle_mask)), - (MOVLPDrr VR128:$src1, VR128:$src2)>; + (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (vector_shuffle VR128:$src1, (loadv2i64 addr:$src2), + MOVHP_shuffle_mask)), + (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +} // 128-bit logical shifts def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), |