diff options
author | Stephen Hines <srhines@google.com> | 2015-04-01 18:49:24 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2015-04-01 18:49:26 +0000 |
commit | 3fa16bd6062e23bcdb82ed4dd965674792e6b761 (patch) | |
tree | 9348fc507292f7e8715d22d64ce5a32131b4f875 /lib/Target/X86/X86VZeroUpper.cpp | |
parent | beed47390a60f6f0c77532b3d3f76bb47ef49423 (diff) | |
parent | ebe69fe11e48d322045d5949c83283927a0d790b (diff) | |
download | external_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.zip external_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.tar.gz external_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.tar.bz2 |
Merge "Update aosp/master LLVM for rebase to r230699."
Diffstat (limited to 'lib/Target/X86/X86VZeroUpper.cpp')
-rw-r--r-- | lib/Target/X86/X86VZeroUpper.cpp | 32 |
1 files changed, 18 insertions, 14 deletions
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index d93baeb..99ba4c0 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -9,7 +9,7 @@ // // This file defines the pass which inserts x86 AVX vzeroupper instructions // before calls to SSE encoded functions. This avoids transition latency -// penalty when tranfering control between AVX encoded instructions and old +// penalty when transferring control between AVX encoded instructions and old // SSE encoding mode. // //===----------------------------------------------------------------------===// @@ -171,7 +171,7 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) { } /// processBasicBlock - Loop over all of the instructions in the basic block, -/// inserting vzero upper instructions before function calls. +/// inserting vzeroupper instructions before function calls. void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { // Start by assuming that the block PASS_THROUGH, which implies no unguarded @@ -202,7 +202,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { // If the call won't clobber any YMM register, skip it as well. It usually // happens on helper function calls (such as '_chkstk', '_ftol2') where // standard calling convention is not used (RegMask is not used to mark - // register clobbered and register usage (def/imp-def/use) is well-dfined + // register clobbered and register usage (def/imp-def/use) is well-defined // and explicitly specified. if (MI->isCall() && !callClobbersAnyYmmReg(MI)) continue; @@ -245,25 +245,29 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { } /// runOnMachineFunction - Loop over all of the basic blocks, inserting -/// vzero upper instructions before function calls. +/// vzeroupper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { - const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>(); + const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); if (!ST.hasAVX() || ST.hasAVX512()) return false; - TII = MF.getSubtarget().getInstrInfo(); + TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); EverMadeChange = false; + bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI); + // Fast check: if the function doesn't use any ymm registers, we don't need // to insert any VZEROUPPER instructions. This is constant-time, so it is // cheap in the common case of no ymm use. - bool YMMUsed = false; - const TargetRegisterClass *RC = &X86::VR256RegClass; - for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); - i != e; i++) { - if (!MRI.reg_nodbg_empty(*i)) { - YMMUsed = true; - break; + bool YMMUsed = FnHasLiveInYmm; + if (!YMMUsed) { + const TargetRegisterClass *RC = &X86::VR256RegClass; + for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; + i++) { + if (!MRI.reg_nodbg_empty(*i)) { + YMMUsed = true; + break; + } } } if (!YMMUsed) { @@ -282,7 +286,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { // If any YMM regs are live in to this function, add the entry block to the // DirtySuccessors list - if (checkFnHasLiveInYmm(MRI)) + if (FnHasLiveInYmm) addDirtySuccessor(MF.front()); // Re-visit all blocks that are successors of EXITS_DIRTY bsocks. Add |