From 2adc503f291d69763c5fc59a8e35d318ee22b77a Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Tue, 13 Nov 2012 19:13:05 +0000 Subject: X86: when constructing VZEXT_LOAD from other loads, makes sure its output chain is correctly setup. As an example, if the original load must happen before later stores, we need to make sure the constructed VZEXT_LOAD is constrained to be before the stores. rdar://12684358 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167859 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx-shuffle.ll | 51 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'test/CodeGen/X86') diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index ec11654..904f048 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -246,3 +246,54 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { ret <8 x float>%S } +; rdar://12684358 +; Make sure loads happen before stores. +; CHECK: swap8doubles +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} +; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} +; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi) +; CHECK: vextractf128 +; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi) +; CHECK: vextractf128 +; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi) +; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi) +define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp { +entry: + %add.ptr = getelementptr inbounds double* %A, i64 2 + %v.i = bitcast double* %A to <2 x double>* + %0 = load <2 x double>* %v.i, align 1 + %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> , <4 x i32> + %v1.i = bitcast double* %add.ptr to <2 x double>* + %1 = load <2 x double>* %v1.i, align 1 + %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind + %add.ptr1 = getelementptr inbounds double* %A, i64 6 + %add.ptr2 = getelementptr inbounds double* %A, i64 4 + %v.i27 = bitcast double* %add.ptr2 to <2 x double>* + %3 = load <2 x double>* %v.i27, align 1 + %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> , <4 x i32> + %v1.i29 = bitcast double* %add.ptr1 to <2 x double>* + %4 = load <2 x double>* %v1.i29, align 1 + %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind + %6 = bitcast double* %C to <4 x double>* + %7 = load <4 x double>* %6, align 32 + %add.ptr5 = getelementptr inbounds double* %C, i64 4 + %8 = bitcast double* %add.ptr5 to <4 x double>* + %9 = load <4 x double>* %8, align 32 + %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> + %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1) + %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> + %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1) + store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16 + store <2 x double> %10, <2 x double>* %v1.i, align 16 + store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16 + store <2 x double> %11, <2 x double>* %v1.i29, align 16 + store <4 x double> %2, <4 x double>* %6, align 32 + store <4 x double> %5, <4 x double>* %8, align 32 + ret void +} +declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone +declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone -- cgit v1.1