diff options
Diffstat (limited to 'lib/Target/X86/X86VZeroUpper.cpp')
-rw-r--r-- | lib/Target/X86/X86VZeroUpper.cpp | 32 |
1 files changed, 18 insertions, 14 deletions
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index d93baeb..99ba4c0 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -9,7 +9,7 @@ // // This file defines the pass which inserts x86 AVX vzeroupper instructions // before calls to SSE encoded functions. This avoids transition latency -// penalty when tranfering control between AVX encoded instructions and old +// penalty when transferring control between AVX encoded instructions and old // SSE encoding mode. // //===----------------------------------------------------------------------===// @@ -171,7 +171,7 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) { } /// processBasicBlock - Loop over all of the instructions in the basic block, -/// inserting vzero upper instructions before function calls. +/// inserting vzeroupper instructions before function calls. void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { // Start by assuming that the block PASS_THROUGH, which implies no unguarded @@ -202,7 +202,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { // If the call won't clobber any YMM register, skip it as well. It usually // happens on helper function calls (such as '_chkstk', '_ftol2') where // standard calling convention is not used (RegMask is not used to mark - // register clobbered and register usage (def/imp-def/use) is well-dfined + // register clobbered and register usage (def/imp-def/use) is well-defined // and explicitly specified. if (MI->isCall() && !callClobbersAnyYmmReg(MI)) continue; @@ -245,25 +245,29 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { } /// runOnMachineFunction - Loop over all of the basic blocks, inserting -/// vzero upper instructions before function calls. +/// vzeroupper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { - const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>(); + const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); if (!ST.hasAVX() || ST.hasAVX512()) return false; - TII = MF.getSubtarget().getInstrInfo(); + TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); EverMadeChange = false; + bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI); + // Fast check: if the function doesn't use any ymm registers, we don't need // to insert any VZEROUPPER instructions. This is constant-time, so it is // cheap in the common case of no ymm use. - bool YMMUsed = false; - const TargetRegisterClass *RC = &X86::VR256RegClass; - for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); - i != e; i++) { - if (!MRI.reg_nodbg_empty(*i)) { - YMMUsed = true; - break; + bool YMMUsed = FnHasLiveInYmm; + if (!YMMUsed) { + const TargetRegisterClass *RC = &X86::VR256RegClass; + for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; + i++) { + if (!MRI.reg_nodbg_empty(*i)) { + YMMUsed = true; + break; + } } } if (!YMMUsed) { @@ -282,7 +286,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { // If any YMM regs are live in to this function, add the entry block to the // DirtySuccessors list - if (checkFnHasLiveInYmm(MRI)) + if (FnHasLiveInYmm) addDirtySuccessor(MF.front()); // Re-visit all blocks that are successors of EXITS_DIRTY bsocks. Add |