aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-11-22 18:12:04 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-11-22 18:12:04 +0000
commitab5c703fdb0b07c6d89271519fd1cca7105b3eae (patch)
tree52b8ff64598ea781f1e347405867c74989a11920
parent4d5a29979000e1d5afc6c31239fe12d888027f4c (diff)
downloadexternal_llvm-ab5c703fdb0b07c6d89271519fd1cca7105b3eae.zip
external_llvm-ab5c703fdb0b07c6d89271519fd1cca7105b3eae.tar.gz
external_llvm-ab5c703fdb0b07c6d89271519fd1cca7105b3eae.tar.bz2
Fix epilogue codegen to avoid leaving the stack pointer in an invalid
state. Previously Thumb2 would restore sp from fp like this: mov sp, r7 sub, sp, #4 If an interrupt is taken after the 'mov' but before the 'sub', callee-saved registers might be clobbered by the interrupt handler. Instead, try restoring directly from sp: add sp, #4 Or, if necessary (with VLA, etc.) use a scratch register to compute sp and then restore it: sub.w r4, r7, #8 mov sp, r7 rdar://8465407 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119977 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMFrameInfo.cpp33
-rw-r--r--lib/Target/ARM/Thumb1FrameInfo.cpp33
-rw-r--r--test/CodeGen/ARM/hello.ll2
-rw-r--r--test/CodeGen/Thumb/dyn-stackalloc.ll23
-rw-r--r--test/CodeGen/Thumb/large-stack.ll8
-rw-r--r--test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll9
-rw-r--r--test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll12
-rw-r--r--test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll34
9 files changed, 124 insertions, 37 deletions
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 171b323..42159c9 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -694,10 +694,11 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MachineFrameInfo *MFI = MF.getFrameInfo();
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
- // scratch register.
+ // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+ // since it's always posible to restore sp from fp in a single instruction.
// FIXME: It will be better just to find spare register here.
- if (needsStackRealignment(MF) &&
- AFI->isThumb2Function())
+ if (AFI->isThumb2Function() &&
+ (MFI->hasVarSizedObjects() || needsStackRealignment(MF)))
MF.getRegInfo().setPhysRegUsed(ARM::R4);
// Spill LR if Thumb1 function uses variable length argument lists.
diff --git a/lib/Target/ARM/ARMFrameInfo.cpp b/lib/Target/ARM/ARMFrameInfo.cpp
index 7766b1f..62f3f98 100644
--- a/lib/Target/ARM/ARMFrameInfo.cpp
+++ b/lib/Target/ARM/ARMFrameInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -212,15 +213,21 @@ void ARMFrameInfo::emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
- if (HasFP)
+ if (HasFP && isARM)
+ // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+ // Note it's not safe to do this in Thumb2 mode because it would have
+ // taken two instructions:
+ // mov sp, r7
+ // sub sp, #24
+ // If an interrupt is taken between the two instructions, then sp is in
+ // an inconsistent state (pointing to the middle of callee-saved area).
+ // The interrupt handler can end up clobbering the registers.
AFI->setShouldRestoreSPFromFP(true);
}
- if (STI.isTargetELF() && hasFP(MF)) {
+ if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
- AFI->setShouldRestoreSPFromFP(true);
- }
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@@ -275,7 +282,7 @@ void ARMFrameInfo::emitPrologue(MachineFunction &MF) const {
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp.
- if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
+ if (MFI->hasVarSizedObjects())
AFI->setShouldRestoreSPFromFP(true);
}
@@ -326,9 +333,21 @@ void ARMFrameInfo::emitEpilogue(MachineFunction &MF,
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ else {
+ // It's not possible to restore SP from FP in a single instruction.
+ // For Darwin, this looks like:
+ // mov sp, r7
+ // sub sp, #24
+ // This is bad, if an interrupt is taken after the mov, sp is in an
+ // inconsistent state.
+ // Use the first callee-saved register as a scratch register.
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(ARM::R4);
+ }
} else {
// Thumb2 or ARM.
if (isARM)
diff --git a/lib/Target/ARM/Thumb1FrameInfo.cpp b/lib/Target/ARM/Thumb1FrameInfo.cpp
index 6a3c1e1..bb1faf5 100644
--- a/lib/Target/ARM/Thumb1FrameInfo.cpp
+++ b/lib/Target/ARM/Thumb1FrameInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -117,13 +118,6 @@ void Thumb1FrameInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
- // Adjust FP so it point to the stack slot that contains the previous FP.
- if (hasFP(MF)) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0);
- AFI->setShouldRestoreSPFromFP(true);
- }
-
// Determine starting offsets of spill areas.
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
@@ -132,12 +126,21 @@ void Thumb1FrameInfo::emitPrologue(MachineFunction &MF) const {
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-
NumBytes = DPRCSOffset;
- if (NumBytes) {
+
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+ if (NumBytes > 7)
+ // If offset is > 7 then sp cannot be adjusted in a single instruction,
+ // try restoring from fp instead.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (NumBytes)
// Insert it after all the callee-save spills.
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
- }
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
@@ -219,10 +222,14 @@ void Thumb1FrameInfo::emitEpilogue(MachineFunction &MF,
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
- if (NumBytes)
- emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
+ if (NumBytes) {
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes,
TII, *RegInfo, dl);
- else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(ARM::R4);
+ } else
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(FramePtr);
} else {
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index ccdc7bf..bfed7a6 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=arm
; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
-; RUN: grep mov | count 3
+; RUN: grep mov | count 2
; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
@str = internal constant [12 x i8] c"Hello World\00"
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index 5c8ad97..1f31dca 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=thumb | not grep {ldr sp}
-; RUN: llc < %s -mtriple=thumb-apple-darwin | \
-; RUN: not grep {sub.*r7}
-; RUN: llc < %s -march=thumb | grep {mov.*r6, sp}
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
define void @t1(%struct.state* %v) {
+; CHECK: t1:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: mov r2, sp
+; CHECK: subs r4, r2, r1
+; CHECK: mov sp, r4
%tmp6 = load i32* null
%tmp8 = alloca float, i32 %tmp6
store i32 1, i32* null
@@ -34,6 +37,18 @@ declare fastcc void @f2(float*, float*, float*, i32)
@str215 = external global [2 x i8]
define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
+; CHECK: t2:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: sub sp, #8
+; CHECK: mov r6, sp
+; CHECK: str r2, [r6, #4]
+; CHECK: str r0, [r6]
+; CHECK-NOT: ldr r0, [sp
+; CHECK: ldr r0, [r6, #4]
+; CHECK: mov r0, sp
+; CHECK: subs r5, r0, r1
+; CHECK: mov sp, r5
%tmp1 = call i32 @strlen( i8* %tag )
%tmp3 = call i32 @strlen( i8* %contents )
%tmp4 = add i32 %tmp1, 2
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index b289484..f4dd3e0 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -12,8 +12,8 @@ define void @test2() {
; CHECK: test2:
; CHECK: ldr r0, LCPI
; CHECK: add sp, r0
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
@@ -24,8 +24,8 @@ define i32 @test3() {
; CHECK: add sp, r2
; CHECK: ldr r1, LCPI
; CHECK: add r1, sp
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
%retval = alloca i32, align 4
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index f26c6d1..550b3ef 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -5,8 +5,13 @@
define hidden i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
entry:
; CHECK: __gcov_execlp:
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: sub sp, #8
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
+; CHECK-NOT: mov sp, r7
+; CHECK: add sp, #8
call void @__gcov_flush() nounwind
br i1 undef, label %bb5, label %bb
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index abcf13a..41f7f29 100644
--- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -5,6 +5,10 @@
define internal fastcc i32 @Callee(i32 %i) nounwind {
entry:
; CHECK: Callee:
+; CHECK: push
+; CHECK: mov r4, sp
+; CHECK: sub.w r12, r4, #1000
+; CHECK: mov sp, r12
%0 = icmp eq i32 %i, 0 ; <i1> [#uses=1]
br i1 %0, label %bb2, label %bb
@@ -17,9 +21,11 @@ bb: ; preds = %entry
ret i32 %4
bb2: ; preds = %entry
-; Must restore sp from fp here
-; CHECK: mov sp, r7
-; CHECK: sub sp, #8
+; Must restore sp from fp here. Make sure not to leave sp in a temporarily invalid
+; state though. rdar://8465407
+; CHECK-NOT: mov sp, r7
+; CHECK: sub.w r4, r7, #8
+; CHECK: mov sp, r4
; CHECK: pop
ret i32 0
}
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
new file mode 100644
index 0000000..313728c
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -0,0 +1,34 @@
+; rdar://8465407
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+%struct.buf = type opaque
+
+declare void @bar() nounwind optsize
+
+define void @foo() nounwind optsize {
+; CHECK: foo:
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: sub sp, #4
+entry:
+ %m.i = alloca %struct.buf*, align 4
+ br label %bb
+
+bb:
+ br i1 undef, label %bb3, label %bb2
+
+bb2:
+ call void @bar() nounwind optsize
+ br i1 undef, label %bb, label %bb3
+
+bb3:
+ br i1 undef, label %return, label %bb
+
+return:
+; CHECK: %return
+; 'mov sp, r7' would have left sp in an invalid state
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub, sp, #4
+; CHECK: add sp, #4
+ ret void
+}