aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2011-08-08 17:15:43 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2011-08-08 17:15:43 +0000
commit66b0f515d5f7d4b830c3407a273facde405e4f86 (patch)
tree86571b1582994d48b0654eed8d0d8f32dd1d004f
parentbf13ee1941a312998f0e754016921045626698e9 (diff)
downloadexternal_llvm-66b0f515d5f7d4b830c3407a273facde405e4f86.zip
external_llvm-66b0f515d5f7d4b830c3407a273facde405e4f86.tar.gz
external_llvm-66b0f515d5f7d4b830c3407a273facde405e4f86.tar.bz2
Don't clobber pending ST regs when FP regs are killed.
X86FloatingPoint keeps track of pending ST registers for an upcoming inline asm instruction with fixed stack register constraints. It does this by remembering which FP register holds the value that should appear at a fixed stack position for the inline asm. When that FP register is killed before the inline asm, make sure to duplicate it to a scratch register, so the ST register still has a live FP reference. This could happen when the same FP register was copied to two ST registers, or when a spill instruction is inserted between the ST copy and the inline asm. This fixes PR10602. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137050 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp20
-rw-r--r--test/CodeGen/X86/inline-asm-fpstack.ll11
2 files changed, 31 insertions, 0 deletions
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 6f619a9..a9b6fdd 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -260,6 +260,21 @@ namespace {
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
+ /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
+ /// RegNo. If any PendingST registers still need the RegNo value, duplicate
+ /// them to new scratch registers.
+ void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
+ for (unsigned i = 0; i != NumPendingSTs; ++i) {
+ if (PendingST[i] != RegNo)
+ continue;
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Duplicating pending ST" << i
+ << " in FP" << RegNo << " to FP" << SR << '\n');
+ duplicateToTop(RegNo, SR, I);
+ PendingST[i] = SR;
+ }
+ }
+
/// popStackAfter - Pop the current value off of the top of the FP stack
/// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
@@ -973,6 +988,9 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+ if (KillsSrc)
+ duplicatePendingSTBeforeKill(Reg, I);
+
// FISTP64m is strange because there isn't a non-popping versions.
// If we have one _and_ we don't want to pop the operand, duplicate the value
// on the stack instead of moving it. This ensure that popping the value is
@@ -1036,6 +1054,7 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
if (KillsSrc) {
+ duplicatePendingSTBeforeKill(Reg, I);
// If this is the last use of the source register, just make sure it's on
// the top of the stack.
moveToTop(Reg, I);
@@ -1322,6 +1341,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// When the source is killed, allocate a scratch FP register.
if (KillsSrc) {
+ duplicatePendingSTBeforeKill(SrcFP, I);
unsigned Slot = getSlot(SrcFP);
unsigned SR = getScratchReg();
PendingST[DstST] = SR;
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 8e48bbe..c9a1c1c 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -329,3 +329,14 @@ entry:
%asmresult = extractvalue %complex %0, 0
ret float %asmresult
}
+
+; Pass the same value in two fixed stack slots.
+; CHECK: PR10602
+; CHECK: flds LCPI
+; CHECK: fld %st(0)
+; CHECK: fcomi %st(1), %st(0)
+define i32 @PR10602() nounwind ssp {
+entry:
+ %0 = tail call i32 asm "fcomi $2, $1; pushf; pop $0", "=r,{st},{st(1)},~{dirflag},~{fpsr},~{flags}"(double 2.000000e+00, double 2.000000e+00) nounwind
+ ret i32 %0
+}