From 7bc389b6b00e26e07fa5ac64e43d9b06b73828c1 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 8 Nov 2011 00:31:58 +0000 Subject: Add x86 isel logic and patterns to match movlps from clang generated IR for _mm_loadl_pi(). rdar://10134392, rdar://10050222 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144052 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vec_shuffle-38.ll | 3 +-- test/CodeGen/X86/vec_shuffle-39.ll | 51 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/vec_shuffle-39.ll (limited to 'test/CodeGen') diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll index 69a2ede..3531e32 100644 --- a/test/CodeGen/X86/vec_shuffle-38.ll +++ b/test/CodeGen/X86/vec_shuffle-38.ll @@ -48,8 +48,7 @@ entry: ; CHECK: f define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp { entry: - ; CHECK: movsd (% - ; CHECK-NEXT: movsd %xmm + ; CHECK: movlps (%rdi), %xmm0 %u110.i = load double* %y, align 1 %tmp8.i = insertelement <2 x double> undef, double %u110.i, i32 0 %tmp9.i = bitcast <2 x double> %tmp8.i to <4 x float> diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll new file mode 100644 index 0000000..6c889b8 --- /dev/null +++ b/test/CodeGen/X86/vec_shuffle-39.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; rdar://10050222, rdar://10134392 + +define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind { +entry: +; CHECK: t1: +; CHECK: movlps (%rdi), %xmm0 +; CHECK: ret + %p.val = load <1 x i64>* %p, align 1 + %0 = bitcast <1 x i64> %p.val to <2 x float> + %shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> + %shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> + ret <4 x float> %shuffle1.i +} + +define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind { +entry: +; CHECK: t1a: +; CHECK: movlps (%rdi), %xmm0 +; CHECK: ret + %0 = bitcast <1 x i64>* %p to double* + %1 = load double* %0 + %2 = insertelement <2 x double> undef, double %1, i32 0 + %3 = bitcast <2 x double> %2 to <4 x float> + %4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> + ret <4 x float> %4 +} + +define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind { +entry: +; CHECK: t2: +; CHECK: movlps %xmm0, (%rdi) +; CHECK: ret + %cast.i = bitcast <4 x float> %a to <2 x i64> + %extract.i = extractelement <2 x i64> %cast.i, i32 0 + %0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0 + store i64 %extract.i, i64* %0, align 8 + ret void +} + +define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind { +entry: +; CHECK: t2a: +; CHECK: movlps %xmm0, (%rdi) +; CHECK: ret + %0 = bitcast <1 x i64>* %p to double* + %1 = bitcast <4 x float> %a to <2 x double> + %2 = extractelement <2 x double> %1, i32 0 + store double %2, double* %0 + ret void +} -- cgit v1.1