diff options
Diffstat (limited to 'test/CodeGen/X86/sse-scalar-fp-arith.ll')
-rw-r--r-- | test/CodeGen/X86/sse-scalar-fp-arith.ll | 149 |
1 files changed, 148 insertions, 1 deletions
diff --git a/test/CodeGen/X86/sse-scalar-fp-arith.ll b/test/CodeGen/X86/sse-scalar-fp-arith.ll index b122ef6..8b1c6d0 100644 --- a/test/CodeGen/X86/sse-scalar-fp-arith.ll +++ b/test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -370,8 +370,155 @@ define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { ret <4 x float> %3 } +; With SSE4.1 or greater, the shuffles in the following tests may +; be lowered to X86Blendi nodes. + +define <4 x float> @blend_add_ss(<4 x float> %a, float %b) { +; SSE-LABEL: blend_add_ss: +; SSE: # BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: blend_add_ss: +; AVX: # BB#0: +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + + %ext = extractelement <4 x float> %a, i32 0 + %op = fadd float %b, %ext + %ins = insertelement <4 x float> undef, float %op, i32 0 + %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> + ret <4 x float> %shuf +} + +define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) { +; SSE-LABEL: blend_sub_ss: +; SSE: # BB#0: +; SSE-NEXT: subss %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: blend_sub_ss: +; AVX: # BB#0: +; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + + %ext = extractelement <4 x float> %a, i32 0 + %op = fsub float %ext, %b + %ins = insertelement <4 x float> undef, float %op, i32 0 + %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> + ret <4 x float> %shuf +} + +define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) { +; SSE-LABEL: blend_mul_ss: +; SSE: # BB#0: +; SSE-NEXT: mulss %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: blend_mul_ss: +; AVX: # BB#0: +; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + + %ext = extractelement <4 x float> %a, i32 0 + %op = fmul float %b, %ext + %ins = insertelement <4 x float> undef, float %op, i32 0 + %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> + ret <4 x float> %shuf +} + +define <4 x float> @blend_div_ss(<4 x float> %a, float %b) { +; SSE-LABEL: blend_div_ss: +; SSE: # BB#0: +; SSE-NEXT: divss %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: blend_div_ss: +; AVX: # BB#0: +; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + + %ext = extractelement <4 x float> %a, i32 0 + %op = fdiv float %ext, %b + %ins = insertelement <4 x float> undef, float %op, i32 0 + %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> + ret <4 x float> %shuf +} + +define <2 x double> @blend_add_sd(<2 x double> %a, double %b) { +; SSE-LABEL: blend_add_sd: +; SSE: # BB#0: +; SSE-NEXT: addsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: blend_add_sd: +; AVX: # BB#0: +; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + + %ext = extractelement <2 x double> %a, i32 0 + %op = fadd double %b, %ext + %ins = insertelement <2 x double> undef, double %op, i32 0 + %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> + ret <2 x double> %shuf +} + +define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) { +; SSE-LABEL: blend_sub_sd: +; SSE: # BB#0: +; SSE-NEXT: subsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: blend_sub_sd: +; AVX: # BB#0: +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + + %ext = extractelement <2 x double> %a, i32 0 + %op = fsub double %ext, %b + %ins = insertelement <2 x double> undef, double %op, i32 0 + %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> + ret <2 x double> %shuf +} + +define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) { +; SSE-LABEL: blend_mul_sd: +; SSE: # BB#0: +; SSE-NEXT: mulsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: blend_mul_sd: +; AVX: # BB#0: +; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + + %ext = extractelement <2 x double> %a, i32 0 + %op = fmul double %b, %ext + %ins = insertelement <2 x double> undef, double %op, i32 0 + %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> + ret <2 x double> %shuf +} + +define <2 x double> @blend_div_sd(<2 x double> %a, double %b) { +; SSE-LABEL: blend_div_sd: +; SSE: # BB#0: +; SSE-NEXT: divsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: blend_div_sd: +; AVX: # BB#0: +; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + + %ext = extractelement <2 x double> %a, i32 0 + %op = fdiv double %ext, %b + %ins = insertelement <2 x double> undef, double %op, i32 0 + %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> + ret <2 x double> %shuf +} + ; Ensure that the backend selects SSE/AVX scalar fp instructions -; from a packed fp instrution plus a vector insert. +; from a packed fp instruction plus a vector insert. define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: insert_test_add_ss: |