diff options
-rw-r--r-- | lib/Target/X86/X86VZeroUpper.cpp | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-intel-ocl.ll | 40 |
2 files changed, 50 insertions, 0 deletions
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index c4a5887..0f77948 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -120,9 +120,19 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { return false; } +static bool clobbersAllYmmRegs(const MachineOperand &MO) { + for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) { + if (!MO.clobbersPhysReg(reg)) + return false; + } + return true; +} + static bool hasYmmReg(MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO)) + return true; if (!MO.isReg()) continue; if (MO.isDebug()) diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll index 0fec965..0550720 100644 --- a/test/CodeGen/X86/avx-intel-ocl.ll +++ b/test/CodeGen/X86/avx-intel-ocl.ll @@ -127,3 +127,43 @@ define i32 @test_int(i32 %a, i32 %b) nounwind { %c = add i32 %c2, %b ret i32 %c } + +; WIN64: test_float4 +; WIN64-NOT: vzeroupper +; WIN64: call +; WIN64-NOT: vzeroupper +; WIN64: call +; WIN64: ret + +; X64: test_float4 +; X64-NOT: vzeroupper +; X64: call +; X64-NOT: vzeroupper +; X64: call +; X64: ret + +; X32: test_float4 +; X32: vzeroupper +; X32: call +; X32: vzeroupper +; X32: call +; X32: ret + +declare <4 x float> @func_float4(<4 x float>, <4 x float>, <4 x float>) + +define <8 x float> @test_float4(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone { +entry: + %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %1 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %2 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %call.i = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %0, <4 x float> %1, <4 x float> %2) nounwind + %3 = shufflevector <4 x float> %call.i, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> + %4 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %5 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %6 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %call.i2 = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %4, <4 x float> %5, <4 x float> %6) nounwind + %7 = shufflevector <4 x float> %call.i2, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> + ret <8 x float> %8 +} + |