aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2013-12-07 09:39:35 +0000
committerBill Wendling <isanbard@gmail.com>2013-12-07 09:39:35 +0000
commit2990853ea8bf4888b179ac6c493836b83769e87b (patch)
tree1996e9d95c75d32bbdc981d2cfb151c3b82a5a55 /lib
parent31928dfc03d92322f9f2fb1c4a7878024d3cc9d1 (diff)
downloadexternal_llvm-2990853ea8bf4888b179ac6c493836b83769e87b.zip
external_llvm-2990853ea8bf4888b179ac6c493836b83769e87b.tar.gz
external_llvm-2990853ea8bf4888b179ac6c493836b83769e87b.tar.bz2
Merging r196261:
------------------------------------------------------------------------ r196261 | hliao | 2013-12-03 01:17:32 -0800 (Tue, 03 Dec 2013) | 13 lines Enhance the fix of PR17631 - The fix to PR17631 fixes part of the cases where 'vzeroupper' should not be issued before 'call' insn. There're other cases where helper calls will be inserted not limited to epilog. These helper calls do not follow the standard calling convention and won't clobber any YMM registers. (So far, all call conventions will clobber any or part of YMM registers.) This patch enhances the previous fix to cover more cases 'vzerosupper' should not be inserted by checking if that function call won't clobber any YMM registers and skipping it if so. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196652 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp35
1 files changed, 27 insertions, 8 deletions
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 0d37a7d..a944652 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -148,6 +148,25 @@ static bool hasYmmReg(MachineInstr *MI) {
return false;
}
+/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
+/// instruction.
+static bool clobbersAnyYmmReg(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isRegMask())
+ continue;
+ for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ }
+ return false;
+}
+
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// vzero upper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
@@ -234,14 +253,6 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
DebugLoc dl = I->getDebugLoc();
MachineInstr *MI = I;
- // Don't need to check instructions added in prolog.
- // In prolog, special function calls may be added for specific targets
- // (e.g. on Windows, a prolog helper '_chkstk' is called when the local
- // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM
- // registers.
- if (MI->getFlag(MachineInstr::FrameSetup))
- continue;
-
bool isControlFlow = MI->isCall() || MI->isReturn();
// Shortcut: don't need to check regular instructions in dirty state.
@@ -260,6 +271,14 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
if (!isControlFlow)
continue;
+ // If the call won't clobber any YMM register, skip it as well. It usually
+ // happens on helper function calls (such as '_chkstk', '_ftol2') where
+ // standard calling convention is not used (RegMask is not used to mark
+ // register clobbered and register usage (def/imp-def/use) is well-dfined
+ // and explicitly specified.
+ if (MI->isCall() && !clobbersAnyYmmReg(MI))
+ continue;
+
BBHasCall = true;
// The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX