diff options
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/sse2.ll | 13 |
2 files changed, 17 insertions, 4 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ad80860..9c20e84 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5597,9 +5597,9 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), // Shuffle with UNPCKLPD def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; + (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; + (UNPCKLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; @@ -5608,9 +5608,9 @@ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), // Shuffle with UNPCKHPD def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; + (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; + (UNPCKHPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 6fc0190..a9d718d 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -192,3 +192,16 @@ entry: ; CHECK: test15: ; CHECK: movhlps %xmm1, %xmm0 } + +; PR8900 +; CHECK: test16: +; CHECK: unpcklpd +; CHECK: ret + +define <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) { + %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3 + %i6 = load <4 x double>* %i5, align 32 + %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2> + ret <2 x double> %i7 +} + |