aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp35
-rw-r--r--test/CodeGen/X86/pr17631.ll20
2 files changed, 43 insertions, 12 deletions
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 0d37a7d..a944652 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -148,6 +148,25 @@ static bool hasYmmReg(MachineInstr *MI) {
return false;
}
+/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
+/// instruction.
+static bool clobbersAnyYmmReg(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isRegMask())
+ continue;
+ for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ }
+ return false;
+}
+
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// vzero upper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
@@ -234,14 +253,6 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
DebugLoc dl = I->getDebugLoc();
MachineInstr *MI = I;
- // Don't need to check instructions added in prolog.
- // In prolog, special function calls may be added for specific targets
- // (e.g. on Windows, a prolog helper '_chkstk' is called when the local
- // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM
- // registers.
- if (MI->getFlag(MachineInstr::FrameSetup))
- continue;
-
bool isControlFlow = MI->isCall() || MI->isReturn();
// Shortcut: don't need to check regular instructions in dirty state.
@@ -260,6 +271,14 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
if (!isControlFlow)
continue;
+ // If the call won't clobber any YMM register, skip it as well. It usually
+ // happens on helper function calls (such as '_chkstk', '_ftol2') where
+ // standard calling convention is not used (RegMask is not used to mark
+ // register clobbered and register usage (def/imp-def/use) is well-dfined
+ // and explicitly specified.
+ if (MI->isCall() && !clobbersAnyYmmReg(MI))
+ continue;
+
BBHasCall = true;
// The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll
index a572ff2..98f951f 100644
--- a/test/CodeGen/X86/pr17631.ll
+++ b/test/CodeGen/X86/pr17631.ll
@@ -1,16 +1,16 @@
; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s
-
+
%struct_type = type { [64 x <8 x float>], <8 x float> }
-
+
; Function Attrs: nounwind readnone
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
-
+
; Function Attrs: nounwind
define i32 @equal(<8 x i32> %A) {
allocas:
%first_alloc = alloca [64 x <8 x i32>]
%second_alloc = alloca %struct_type
-
+
%A1 = bitcast <8 x i32> %A to <8 x float>
%A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1)
ret i32 %A2
@@ -20,3 +20,15 @@ allocas:
; CHECK-NOT: vzeroupper
; CHECK: _chkstk
; CHECK: ret
+
+define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
+ %i = fptoui double %x to i64
+ store i64 %i, i64* %p
+ %ret = fadd <8 x float> %y, %y
+ ret <8 x float> %ret
+}
+
+; CHECK: foo
+; CHECK-NOT: vzeroupper
+; CHECK: _ftol2
+; CHECK: ret