diff options
Diffstat (limited to 'test/CodeGen/X86/sse_partial_update.ll')
-rw-r--r-- | test/CodeGen/X86/sse_partial_update.ll | 66 |
1 files changed, 61 insertions, 5 deletions
diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll index 2c16a55..377c3b7 100644 --- a/test/CodeGen/X86/sse_partial_update.ll +++ b/test/CodeGen/X86/sse_partial_update.ll @@ -5,11 +5,18 @@ ; There is a mismatch between the intrinsic and the actual instruction. ; The actual instruction has a partial update of dest, while the intrinsic ; passes through the upper FP values. Here, we make sure the source and -; destination of rsqrtss are the same. -define void @t1(<4 x float> %a) nounwind uwtable ssp { +; destination of each scalar unary op are the same. + +define void @rsqrtss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: t1: +; CHECK-LABEL: rsqrtss: ; CHECK: rsqrtss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movshdup +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 %conv = fpext float %a.addr.0.extract to double @@ -21,10 +28,16 @@ entry: declare void @callee(double, double) declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone -define void @t2(<4 x float> %a) nounwind uwtable ssp { +define void @rcpss(<4 x float> %a) nounwind uwtable ssp { entry: -; CHECK-LABEL: t2: +; CHECK-LABEL: rcpss: ; CHECK: rcpss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movshdup +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind %a.addr.0.extract = extractelement <4 x float> %0, i32 0 %conv = fpext float %a.addr.0.extract to double @@ -34,3 +47,46 @@ entry: ret void } declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + +define void @sqrtss(<4 x float> %a) nounwind uwtable ssp { +entry: +; CHECK-LABEL: sqrtss: +; CHECK: sqrtss %xmm0, %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movshdup +; CHECK-NEXT: cvtss2sd %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + + %0 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a) nounwind + %a.addr.0.extract = extractelement <4 x float> %0, i32 0 + %conv = fpext float %a.addr.0.extract to double + %a.addr.4.extract = extractelement <4 x float> %0, i32 1 + %conv3 = fpext float %a.addr.4.extract to double + tail call void @callee(double %conv, double %conv3) nounwind + ret void +} +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone + +define void @sqrtsd(<2 x double> %a) nounwind uwtable ssp { +entry: +; CHECK-LABEL: sqrtsd: +; CHECK: sqrtsd %xmm0, %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0 +; CHECK-NEXT: shufpd +; CHECK-NEXT: cvtsd2ss %xmm0 +; CHECK-NEXT: movap +; CHECK-NEXT: jmp + + %0 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a) nounwind + %a0 = extractelement <2 x double> %0, i32 0 + %conv = fptrunc double %a0 to float + %a1 = extractelement <2 x double> %0, i32 1 + %conv3 = fptrunc double %a1 to float + tail call void @callee2(float %conv, float %conv3) nounwind + ret void +} + +declare void @callee2(float, float) +declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone + |