diff options
Diffstat (limited to 'test/CodeGen/X86/vec_ss_load_fold.ll')
-rw-r--r-- | test/CodeGen/X86/vec_ss_load_fold.ll | 37 |
1 files changed, 34 insertions, 3 deletions
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll index b1613fb..c8b2927 100644 --- a/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/test/CodeGen/X86/vec_ss_load_fold.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 -o %t -; RUN: grep minss %t | grep CPI | count 2 -; RUN: grep CPI %t | not grep movss +; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s target datalayout = "e-p:32:32" target triple = "i686-apple-darwin8.7.2" @@ -17,6 +15,10 @@ define i16 @test1(float %f) nounwind { %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1] %tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1] ret i16 %tmp69 +; CHECK: test1: +; CHECK: subss LCPI1_ +; CHECK: mulss LCPI1_ +; CHECK: minss LCPI1_ } define i16 @test2(float %f) nounwind { @@ -28,6 +30,10 @@ define i16 @test2(float %f) nounwind { %tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1] %tmp69 = trunc i32 %tmp to i16 ; <i16> [#uses=1] ret i16 %tmp69 +; CHECK: test2: +; CHECK: addss LCPI2_ +; CHECK: mulss LCPI2_ +; CHECK: minss LCPI2_ } declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) @@ -39,3 +45,28 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) + + +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) +declare <4 x float> @f() + +define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind { + %a = load float *%b + %B = insertelement <4 x float> undef, float %a, i32 0 + %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4) + ret <4 x float> %X +; CHECK: test3: +; CHECK: roundss $4, (%eax), %xmm0 +} + +define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind { + %a = load float *%b + %B = insertelement <4 x float> undef, float %a, i32 0 + %q = call <4 x float> @f() + %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4) + ret <4 x float> %X +; CHECK: test4: +; CHECK: movss (%eax), %xmm +; CHECK: call +; CHECK: roundss $4, %xmm{{.*}}, %xmm0 +} |