aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/README-SSE.txt5
-rw-r--r--lib/Target/X86/X86InstrSSE.td34
2 files changed, 21 insertions, 18 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 7110b31..bc51b53 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -907,3 +907,8 @@ We should be able to use:
cvtsi2ss 8($esp), %xmm0
since we know the stack slot is already zext'd.
+//===---------------------------------------------------------------------===//
+
+Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64))
+when code size is critical. movlps is slower than movsd on core2 but it's one
+byte shorter.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b7a959a..4fc1044 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3019,62 +3019,60 @@ def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
-def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memop addr:$src2),
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memop addr:$src2),
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVLP_shuffle_mask)),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memop addr:$src2),
+def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVHP_shuffle_mask)),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memop addr:$src2),
+def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVHP_shuffle_mask)),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (vector_shuffle VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memop addr:$src2),
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVLP_shuffle_mask)),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (vector_shuffle VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)),
+def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVHP_shuffle_mask)),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memop addr:$src2),
+def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVHP_shuffle_mask)),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
-def : Pat<(store (v4f32 (vector_shuffle (memop addr:$src1), VR128:$src2,
+def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVLP_shuffle_mask)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2f64 (vector_shuffle (memop addr:$src1), VR128:$src2,
+def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVLP_shuffle_mask)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4f32 (vector_shuffle (memop addr:$src1), VR128:$src2,
+def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVHP_shuffle_mask)), addr:$src1),
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2f64 (vector_shuffle (memop addr:$src1), VR128:$src2,
+def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVHP_shuffle_mask)), addr:$src1),
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(store (v4i32 (vector_shuffle
- (bc_v4i32 (memopv2i64 addr:$src1)), VR128:$src2,
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2,
MOVLP_shuffle_mask)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2i64 (vector_shuffle (memop addr:$src1), VR128:$src2,
+def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVLP_shuffle_mask)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(store (v4i32 (vector_shuffle
- (bc_v4i32 (memopv2i64 addr:$src1)), VR128:$src2,
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2,
MOVHP_shuffle_mask)), addr:$src1),
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2i64 (vector_shuffle (memop addr:$src1), VR128:$src2,
+def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVHP_shuffle_mask)), addr:$src1),
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;