aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDale Johannesen <dalej@apple.com>2010-02-12 21:35:34 +0000
committerDale Johannesen <dalej@apple.com>2010-02-12 21:35:34 +0000
commitc12da8d30a1394847ee4608fcd54daa24b889b37 (patch)
treeb92f35058dbf699b7adadb2a616e7eaf9ac55995
parentfdfeb6976f07ad10d809b922ed7376ba2a3539be (diff)
downloadexternal_llvm-c12da8d30a1394847ee4608fcd54daa24b889b37.zip
external_llvm-c12da8d30a1394847ee4608fcd54daa24b889b37.tar.gz
external_llvm-c12da8d30a1394847ee4608fcd54daa24b889b37.tar.bz2
When save/restoring CR at prolog/epilog, in a large
stack frame, the prolog/epilog code was using the same register for the copy of CR and the address of the save slot. Oops. This is fixed here for Darwin, sort of, by reserving R2 for this case. A better way would be to do the store before the decrement of SP, which is safe on Darwin due to the red zone. SVR4 probably has the same problem, but I don't know how to fix it; there is no red zone and R2 is already used for something else. I'm going to leave it to someone interested in that target. Better still would be to rewrite the CR-saving code completely; spilling each CR subregister individually is horrible code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96015 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp44
-rw-r--r--test/CodeGen/PowerPC/2010-02-12-saveCR.ll30
2 files changed, 60 insertions, 14 deletions
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index af7d812..3db623a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -421,22 +421,30 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
return true;
} else {
- // FIXME: We use R0 here, because it isn't available for RA. We need to
- // store the CR in the low 4-bits of the saved value. First, issue a MFCR
- // to save all of the CRBits.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0));
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+
+ // We need to store the CR in the low 4-bits of the saved value. First,
+ // issue a MFCR to save all of the CRBits.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
if (SrcReg != PPC::CR0) {
unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
- // rlwinm r0, r0, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31));
+ // rlwinm scratch, scratch, ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(ShiftBits)
+ .addImm(0).addImm(31));
}
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(PPC::R0,
+ .addReg(ScratchReg,
getKillRegState(isKill)),
FrameIdx));
}
@@ -540,20 +548,28 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (RC == PPC::CRRCRegisterClass) {
- // FIXME: We use R0 here, because it isn't available for RA.
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0),
- FrameIdx));
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ ScratchReg), FrameIdx));
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31));
}
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+ .addReg(ScratchReg));
} else if (RC == PPC::CRBITRCRegisterClass) {
unsigned Reg = 0;
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
new file mode 100644
index 0000000..b73382e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; ModuleID = 'hh.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.6"
+; This formerly used R0 for both the stack address and CR.
+
+define void @foo() nounwind {
+entry:
+;CHECK: mfcr r2
+;CHECK: rlwinm r2, r2, 8, 0, 31
+;CHECK: lis r0, 1
+;CHECK: ori r0, r0, 34540
+;CHECK: stwx r2, r1, r0
+ %x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1]
+ call void @bar(i8* %x1) nounwind
+ call void asm sideeffect "", "~{cr2}"() nounwind
+ br label %return
+
+return: ; preds = %entry
+;CHECK: lis r0, 1
+;CHECK: ori r0, r0, 34540
+;CHECK: lwzx r2, r1, r0
+;CHECK: rlwinm r2, r2, 24, 0, 31
+;CHECK: mtcrf 32, r2
+ ret void
+}
+
+declare void @bar(i8*)