aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-05-21 18:17:54 +0000
committerChris Lattner <sabre@nondot.org>2010-05-21 18:17:54 +0000
commita26a8471bdb132f963f26e4df2091ed204024f36 (patch)
treee88cfb3999012b41a256078917946f4a03dd1dc5
parentd596c90030fc7a4dbfdfcab32ce0eae10171018e (diff)
downloadexternal_llvm-a26a8471bdb132f963f26e4df2091ed204024f36.zip
external_llvm-a26a8471bdb132f963f26e4df2091ed204024f36.tar.gz
external_llvm-a26a8471bdb132f963f26e4df2091ed204024f36.tar.bz2
now that fp reg kill insertion stuff happens as a separate
pass after isel instead of being interlaced with it, we can trust that all the code for a function has been isel'd before it is run. The practical impact of this is that we can scan for machine instr phis instead of doing a fuzzy match on the LLVM BB for phi nodes. Doing the fuzzy match required knowing when isel would produce an fp reg stack phi which was gross. It was also wrong in cases where select got lowered to a branch tree because cmovs aren't available (PR6828). Just do the scan on machine phis which is simpler, faster and more correct. This fixes PR6828. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@104333 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp71
-rw-r--r--test/CodeGen/X86/fp-stack.ll25
2 files changed, 58 insertions, 38 deletions
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index d9c69f5..747683d 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
-#include "X86Subtarget.h"
#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -53,10 +52,26 @@ FunctionPass *llvm::createX87FPRegKillInserterPass() {
return new FPRegKiller();
}
+/// isFPStackVReg - Return true if the specified vreg is from a fp stack
+/// register class.
+static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RegNo))
+ return false;
+
+ switch (MRI.getRegClass(RegNo)->getID()) {
+ default: return false;
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ return true;
+ }
+}
+
+
/// ContainsFPStackCode - Return true if the specific MBB has floating point
/// stack code, and thus needs an FP_REG_KILL.
-static bool ContainsFPStackCode(MachineBasicBlock *MBB, unsigned SSELevel,
- MachineRegisterInfo &MRI) {
+static bool ContainsFPStackCode(MachineBasicBlock *MBB,
+ const MachineRegisterInfo &MRI) {
// Scan the block, looking for instructions that define fp stack vregs.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
@@ -64,40 +79,27 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB, unsigned SSELevel,
continue;
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef() ||
- !TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()))
+ if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
continue;
- const TargetRegisterClass *RegClass =
- MRI.getRegClass(I->getOperand(op).getReg());
-
- switch (RegClass->getID()) {
- default: break;
- case X86::RFP32RegClassID:
- case X86::RFP64RegClassID:
- case X86::RFP80RegClassID:
+ if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
return true;
- }
}
}
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
- // a copy of the input value in this block. In SSE mode, we only care about
- // 80-bit values.
-
- // Final check, check LLVM BB's that are successors to the LLVM BB
- // corresponding to BB for FP PHI nodes.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
- SI != E; ++SI) {
- const PHINode *PN;
- for (BasicBlock::const_iterator II = SI->begin();
- (PN = dyn_cast<PHINode>(II)); ++II) {
- if (PN->getType()->isX86_FP80Ty() ||
- (SSELevel == 0 && PN->getType()->isFloatingPointTy()) ||
- (SSELevel < 2 && PN->getType()->isDoubleTy())) {
+ // a copy of the input value in this block, which is a definition of the
+ // value.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++ SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
+ I != E; ++I) {
+ // All PHI nodes are at the top of the block.
+ if (!I->isPHI()) break;
+
+ if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
return true;
- }
}
}
@@ -120,19 +122,12 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// Fast-path: If nothing is using the x87 registers, we don't need to do
// any scanning.
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
return false;
- const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
- unsigned SSELevel = 0;
- if (Subtarget.hasSSE2())
- SSELevel = 2;
- else if (Subtarget.hasSSE1())
- SSELevel = 1;
-
bool Changed = false;
MachineFunction::iterator MBBI = MF.begin();
MachineFunction::iterator EndMBB = MF.end();
@@ -149,7 +144,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
}
// If we find any FP stack code, emit the FP_REG_KILL instruction.
- if (ContainsFPStackCode(MBB, SSELevel, MRI)) {
+ if (ContainsFPStackCode(MBB, MRI)) {
BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;
diff --git a/test/CodeGen/X86/fp-stack.ll b/test/CodeGen/X86/fp-stack.ll
new file mode 100644
index 0000000..dca644d
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack.ll
@@ -0,0 +1,25 @@
+; RUN: llc %s -o - -mcpu=pentium
+; PR6828
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+ %tmp6 = load x86_fp80* undef ; <x86_fp80> [#uses=2]
+ %tmp15 = load x86_fp80* undef ; <x86_fp80> [#uses=2]
+ %tmp24 = load x86_fp80* undef ; <x86_fp80> [#uses=1]
+ br i1 undef, label %return, label %bb.nph
+
+bb.nph: ; preds = %entry
+ %cmp139 = fcmp ogt x86_fp80 %tmp15, %tmp6 ; <i1> [#uses=1]
+ %maxdiag.0 = select i1 %cmp139, x86_fp80 %tmp15, x86_fp80 %tmp6 ; <x86_fp80> [#uses=1]
+ %cmp139.1 = fcmp ogt x86_fp80 %tmp24, %maxdiag.0 ; <i1> [#uses=1]
+ br i1 %cmp139.1, label %sw.bb372, label %return
+
+sw.bb372: ; preds = %for.end
+ ret void
+
+return: ; preds = %for.end
+ ret void
+}
+