From ab5c703fdb0b07c6d89271519fd1cca7105b3eae Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 22 Nov 2010 18:12:04 +0000 Subject: Fix epilogue codegen to avoid leaving the stack pointer in an invalid state. Previously Thumb2 would restore sp from fp like this: mov sp, r7 sub, sp, #4 If an interrupt is taken after the 'mov' but before the 'sub', callee-saved registers might be clobbered by the interrupt handler. Instead, try restoring directly from sp: add sp, #4 Or, if necessary (with VLA, etc.) use a scratch register to compute sp and then restore it: sub.w r4, r7, #8 mov sp, r7 rdar://8465407 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119977 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 7 +++-- lib/Target/ARM/ARMFrameInfo.cpp | 33 ++++++++++++++++----- lib/Target/ARM/Thumb1FrameInfo.cpp | 33 ++++++++++++--------- test/CodeGen/ARM/hello.ll | 2 +- test/CodeGen/Thumb/dyn-stackalloc.ll | 23 ++++++++++++--- test/CodeGen/Thumb/large-stack.ll | 8 ++--- test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll | 9 ++++-- .../CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll | 12 ++++++-- test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll | 34 ++++++++++++++++++++++ 9 files changed, 124 insertions(+), 37 deletions(-) create mode 100644 test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 171b323..42159c9 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -694,10 +694,11 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MachineFrameInfo *MFI = MF.getFrameInfo(); // Spill R4 if Thumb2 function requires stack realignment - it will be used as - // scratch register. + // scratch register. Also spill R4 if Thumb2 function has varsized objects, + // since it's always posible to restore sp from fp in a single instruction. // FIXME: It will be better just to find spare register here. - if (needsStackRealignment(MF) && - AFI->isThumb2Function()) + if (AFI->isThumb2Function() && + (MFI->hasVarSizedObjects() || needsStackRealignment(MF))) MF.getRegInfo().setPhysRegUsed(ARM::R4); // Spill LR if Thumb1 function uses variable length argument lists. diff --git a/lib/Target/ARM/ARMFrameInfo.cpp b/lib/Target/ARM/ARMFrameInfo.cpp index 7766b1f..62f3f98 100644 --- a/lib/Target/ARM/ARMFrameInfo.cpp +++ b/lib/Target/ARM/ARMFrameInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -212,15 +213,21 @@ void ARMFrameInfo::emitPrologue(MachineFunction &MF) const { if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); - if (HasFP) + if (HasFP && isARM) + // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 + // Note it's not safe to do this in Thumb2 mode because it would have + // taken two instructions: + // mov sp, r7 + // sub sp, #24 + // If an interrupt is taken between the two instructions, then sp is in + // an inconsistent state (pointing to the middle of callee-saved area). + // The interrupt handler can end up clobbering the registers. AFI->setShouldRestoreSPFromFP(true); } - if (STI.isTargetELF() && hasFP(MF)) { + if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); - AFI->setShouldRestoreSPFromFP(true); - } AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); @@ -275,7 +282,7 @@ void ARMFrameInfo::emitPrologue(MachineFunction &MF) const { // If the frame has variable sized objects then the epilogue must restore // the sp from fp. - if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects()) + if (MFI->hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); } @@ -326,9 +333,21 @@ void ARMFrameInfo::emitEpilogue(MachineFunction &MF, if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, ARMCC::AL, 0, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + else { + // It's not possible to restore SP from FP in a single instruction. + // For Darwin, this looks like: + // mov sp, r7 + // sub sp, #24 + // This is bad, if an interrupt is taken after the mov, sp is in an + // inconsistent state. + // Use the first callee-saved register as a scratch register. + assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + "No scratch register to restore SP from FP!"); + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) + .addReg(ARM::R4); + } } else { // Thumb2 or ARM. if (isARM) diff --git a/lib/Target/ARM/Thumb1FrameInfo.cpp b/lib/Target/ARM/Thumb1FrameInfo.cpp index 6a3c1e1..bb1faf5 100644 --- a/lib/Target/ARM/Thumb1FrameInfo.cpp +++ b/lib/Target/ARM/Thumb1FrameInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -117,13 +118,6 @@ void Thumb1FrameInfo::emitPrologue(MachineFunction &MF) const { dl = MBBI->getDebugLoc(); } - // Adjust FP so it point to the stack slot that contains the previous FP. - if (hasFP(MF)) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); - AFI->setShouldRestoreSPFromFP(true); - } - // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; @@ -132,12 +126,21 @@ void Thumb1FrameInfo::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - NumBytes = DPRCSOffset; - if (NumBytes) { + + // Adjust FP so it point to the stack slot that contains the previous FP. + if (hasFP(MF)) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + if (NumBytes > 7) + // If offset is > 7 then sp cannot be adjusted in a single instruction, + // try restoring from fp instead. + AFI->setShouldRestoreSPFromFP(true); + } + + if (NumBytes) // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); - } if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -219,10 +222,14 @@ void Thumb1FrameInfo::emitEpilogue(MachineFunction &MF, NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. - if (NumBytes) - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, + if (NumBytes) { + assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + "No scratch register to restore SP from FP!"); + emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo, dl); - else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) + .addReg(ARM::R4); + } else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(FramePtr); } else { diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll index ccdc7bf..bfed7a6 100644 --- a/test/CodeGen/ARM/hello.ll +++ b/test/CodeGen/ARM/hello.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=arm ; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1 ; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \ -; RUN: grep mov | count 3 +; RUN: grep mov | count 2 ; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2 @str = internal constant [12 x i8] c"Hello World\00" diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll index 5c8ad97..1f31dca 100644 --- a/test/CodeGen/Thumb/dyn-stackalloc.ll +++ b/test/CodeGen/Thumb/dyn-stackalloc.ll @@ -1,12 +1,15 @@ -; RUN: llc < %s -march=thumb | not grep {ldr sp} -; RUN: llc < %s -mtriple=thumb-apple-darwin | \ -; RUN: not grep {sub.*r7} -; RUN: llc < %s -march=thumb | grep {mov.*r6, sp} +; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } define void @t1(%struct.state* %v) { +; CHECK: t1: +; CHECK: push +; CHECK: add r7, sp, #12 +; CHECK: mov r2, sp +; CHECK: subs r4, r2, r1 +; CHECK: mov sp, r4 %tmp6 = load i32* null %tmp8 = alloca float, i32 %tmp6 store i32 1, i32* null @@ -34,6 +37,18 @@ declare fastcc void @f2(float*, float*, float*, i32) @str215 = external global [2 x i8] define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) { +; CHECK: t2: +; CHECK: push +; CHECK: add r7, sp, #12 +; CHECK: sub sp, #8 +; CHECK: mov r6, sp +; CHECK: str r2, [r6, #4] +; CHECK: str r0, [r6] +; CHECK-NOT: ldr r0, [sp +; CHECK: ldr r0, [r6, #4] +; CHECK: mov r0, sp +; CHECK: subs r5, r0, r1 +; CHECK: mov sp, r5 %tmp1 = call i32 @strlen( i8* %tag ) %tmp3 = call i32 @strlen( i8* %contents ) %tmp4 = add i32 %tmp1, 2 diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index b289484..f4dd3e0 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -12,8 +12,8 @@ define void @test2() { ; CHECK: test2: ; CHECK: ldr r0, LCPI ; CHECK: add sp, r0 -; CHECK: mov sp, r7 -; CHECK: sub sp, #4 +; CHECK: subs r4, r7, #4 +; CHECK: mov sp, r4 %tmp = alloca [ 4168 x i8 ] , align 4 ret void } @@ -24,8 +24,8 @@ define i32 @test3() { ; CHECK: add sp, r2 ; CHECK: ldr r1, LCPI ; CHECK: add r1, sp -; CHECK: mov sp, r7 -; CHECK: sub sp, #4 +; CHECK: subs r4, r7, #4 +; CHECK: mov sp, r4 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 %a = alloca [805306369 x i8], align 16 diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll index f26c6d1..550b3ef 100644 --- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll +++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll @@ -5,8 +5,13 @@ define hidden i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind { entry: ; CHECK: __gcov_execlp: -; CHECK: mov sp, r7 -; CHECK: sub sp, #4 +; CHECK: sub sp, #8 +; CHECK: push +; CHECK: add r7, sp, #4 +; CHECK: subs r4, r7, #4 +; CHECK: mov sp, r4 +; CHECK-NOT: mov sp, r7 +; CHECK: add sp, #8 call void @__gcov_flush() nounwind br i1 undef, label %bb5, label %bb diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll index abcf13a..41f7f29 100644 --- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll +++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll @@ -5,6 +5,10 @@ define internal fastcc i32 @Callee(i32 %i) nounwind { entry: ; CHECK: Callee: +; CHECK: push +; CHECK: mov r4, sp +; CHECK: sub.w r12, r4, #1000 +; CHECK: mov sp, r12 %0 = icmp eq i32 %i, 0 ; [#uses=1] br i1 %0, label %bb2, label %bb @@ -17,9 +21,11 @@ bb: ; preds = %entry ret i32 %4 bb2: ; preds = %entry -; Must restore sp from fp here -; CHECK: mov sp, r7 -; CHECK: sub sp, #8 +; Must restore sp from fp here. Make sure not to leave sp in a temporarily invalid +; state though. rdar://8465407 +; CHECK-NOT: mov sp, r7 +; CHECK: sub.w r4, r7, #8 +; CHECK: mov sp, r4 ; CHECK: pop ret i32 0 } diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll new file mode 100644 index 0000000..313728c --- /dev/null +++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll @@ -0,0 +1,34 @@ +; rdar://8465407 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s + +%struct.buf = type opaque + +declare void @bar() nounwind optsize + +define void @foo() nounwind optsize { +; CHECK: foo: +; CHECK: push +; CHECK: add r7, sp, #4 +; CHECK: sub sp, #4 +entry: + %m.i = alloca %struct.buf*, align 4 + br label %bb + +bb: + br i1 undef, label %bb3, label %bb2 + +bb2: + call void @bar() nounwind optsize + br i1 undef, label %bb, label %bb3 + +bb3: + br i1 undef, label %return, label %bb + +return: +; CHECK: %return +; 'mov sp, r7' would have left sp in an invalid state +; CHECK-NOT: mov sp, r7 +; CHECK-NOT: sub, sp, #4 +; CHECK: add sp, #4 + ret void +} -- cgit v1.1