aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86VZeroUpper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86VZeroUpper.cpp')
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp32
1 files changed, 18 insertions, 14 deletions
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index d93baeb..99ba4c0 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -9,7 +9,7 @@
//
// This file defines the pass which inserts x86 AVX vzeroupper instructions
// before calls to SSE encoded functions. This avoids transition latency
-// penalty when tranfering control between AVX encoded instructions and old
+// penalty when transferring control between AVX encoded instructions and old
// SSE encoding mode.
//
//===----------------------------------------------------------------------===//
@@ -171,7 +171,7 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
}
/// processBasicBlock - Loop over all of the instructions in the basic block,
-/// inserting vzero upper instructions before function calls.
+/// inserting vzeroupper instructions before function calls.
void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// Start by assuming that the block PASS_THROUGH, which implies no unguarded
@@ -202,7 +202,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// If the call won't clobber any YMM register, skip it as well. It usually
// happens on helper function calls (such as '_chkstk', '_ftol2') where
// standard calling convention is not used (RegMask is not used to mark
- // register clobbered and register usage (def/imp-def/use) is well-dfined
+ // register clobbered and register usage (def/imp-def/use) is well-defined
// and explicitly specified.
if (MI->isCall() && !callClobbersAnyYmmReg(MI))
continue;
@@ -245,25 +245,29 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
}
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
-/// vzero upper instructions before function calls.
+/// vzeroupper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
- const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
if (!ST.hasAVX() || ST.hasAVX512())
return false;
- TII = MF.getSubtarget().getInstrInfo();
+ TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
EverMadeChange = false;
+ bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
+
// Fast check: if the function doesn't use any ymm registers, we don't need
// to insert any VZEROUPPER instructions. This is constant-time, so it is
// cheap in the common case of no ymm use.
- bool YMMUsed = false;
- const TargetRegisterClass *RC = &X86::VR256RegClass;
- for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
- i != e; i++) {
- if (!MRI.reg_nodbg_empty(*i)) {
- YMMUsed = true;
- break;
+ bool YMMUsed = FnHasLiveInYmm;
+ if (!YMMUsed) {
+ const TargetRegisterClass *RC = &X86::VR256RegClass;
+ for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
+ i++) {
+ if (!MRI.reg_nodbg_empty(*i)) {
+ YMMUsed = true;
+ break;
+ }
}
}
if (!YMMUsed) {
@@ -282,7 +286,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
// If any YMM regs are live in to this function, add the entry block to the
// DirtySuccessors list
- if (checkFnHasLiveInYmm(MRI))
+ if (FnHasLiveInYmm)
addDirtySuccessor(MF.front());
// Re-visit all blocks that are successors of EXITS_DIRTY bsocks. Add