aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp16
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp105
-rw-r--r--test/CodeGen/X86/avx-vzeroupper.ll26
5 files changed, 153 insertions, 0 deletions
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 1fd5512..c481eb9 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -32,6 +32,7 @@ set(sources
X86Subtarget.cpp
X86TargetMachine.cpp
X86TargetObjectFile.cpp
+ X86VZeroUpper.cpp
)
if( CMAKE_CL_64 )
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index d1e1933..d480d0c 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -48,6 +48,11 @@ FunctionPass *createX86FloatingPointStackifierPass();
/// crossings.
FunctionPass *createSSEDomainFixPass();
+/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
+/// before each call to avoid transition penalty between functions encoded with
+/// AVX and SSE.
+FunctionPass *createX86IssueVZeroUpperPass();
+
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
/// to the specified MCE object.
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 569c040..95e7021 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -16,6 +16,7 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
@@ -92,6 +93,16 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
}
//===----------------------------------------------------------------------===//
+// Command line options for x86
+//===----------------------------------------------------------------------===//
+bool UseVZeroUpper;
+
+static cl::opt<bool, true>
+VZeroUpper("x86-use-vzeroupper",
+ cl::desc("Minimize AVX to SSE transition penalty"),
+ cl::location(UseVZeroUpper), cl::init(false));
+
+//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
@@ -125,6 +136,11 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
PM.add(createSSEDomainFixPass());
return true;
}
+
+ if (Subtarget.hasAVX() && UseVZeroUpper) {
+ PM.add(createX86IssueVZeroUpperPass());
+ return true;
+ }
return false;
}
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
new file mode 100644
index 0000000..d87efc9
--- /dev/null
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -0,0 +1,105 @@
+//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which inserts x86 AVX vzeroupper instructions
+// before calls to SSE encoded functions. This avoids transition latency
+// penalty when tranfering control between AVX encoded instructions and old
+// SSE encoding mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
+
+namespace {
+ struct VZeroUpperInserter : public MachineFunctionPass {
+ static char ID;
+ VZeroUpperInserter() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ MachineBasicBlock *MBB; // Current basic block
+ };
+ char VZeroUpperInserter::ID = 0;
+}
+
+FunctionPass *llvm::createX86IssueVZeroUpperPass() {
+ return new VZeroUpperInserter();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// vzero upper instructions before function calls.
+bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ bool Changed = false;
+
+ // Process any unreachable blocks in arbitrary order now.
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ Changed |= processBasicBlock(MF, *BB);
+
+ return Changed;
+}
+
+bool isCallToModuleFn(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "Isn't a call instruction");
+
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isGlobal())
+ continue;
+
+ const GlobalValue *GV = MO.getGlobal();
+ GlobalValue::LinkageTypes LT = GV->getLinkage();
+ if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) ||
+ (GV->isExternalLinkage(LT) && !GV->isDeclaration()))
+ return true;
+
+ return false;
+ }
+ return false;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// inserting vzero upper instructions before function calls.
+bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
+ MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = I->getDebugLoc();
+
+ // Insert a vzeroupper instruction before each control transfer
+ // to functions outside this module
+ if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) {
+ BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
+ ++NumVZU;
+ }
+ }
+
+ return Changed;
+}
diff --git a/test/CodeGen/X86/avx-vzeroupper.ll b/test/CodeGen/X86/avx-vzeroupper.ll
new file mode 100644
index 0000000..eaf236c
--- /dev/null
+++ b/test/CodeGen/X86/avx-vzeroupper.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
+entry:
+ %add.i = fadd <4 x float> %a, %a
+ ret <4 x float> %add.i
+}
+
+; CHECK: _test00
+define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+ %add.i = fadd <4 x float> %a, %b
+ ; CHECK: vzeroupper
+ ; CHECK-NEXT: callq _do_sse
+ %call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
+ %sub.i = fsub <4 x float> %call3, %add.i
+ ; CHECK-NOT: vzeroupper
+ ; CHECK: callq _do_sse_local
+ %call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
+ ; CHECK: vzeroupper
+ ; CHECK-NEXT: jmp _do_sse
+ %call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
+ ret <4 x float> %call10
+}
+
+declare <4 x float> @do_sse(<4 x float>)