aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Northover <Tim.Northover@arm.com>2013-04-21 11:57:07 +0000
committerTim Northover <Tim.Northover@arm.com>2013-04-21 11:57:07 +0000
commit4cc1407b84dc6dbbc0d62b1d1b8db7c0ddec86cc (patch)
tree9bff7608272aef75d3b7f50402d567ee277424dc
parent7aad829243dd0b14515e556f87bfada139c56f81 (diff)
downloadexternal_llvm-4cc1407b84dc6dbbc0d62b1d1b8db7c0ddec86cc.zip
external_llvm-4cc1407b84dc6dbbc0d62b1d1b8db7c0ddec86cc.tar.gz
external_llvm-4cc1407b84dc6dbbc0d62b1d1b8db7c0ddec86cc.tar.bz2
ARM: Use ldrd/strd to spill 64-bit pairs when available.
This allows common sp-offsets to be part of the instruction and is probably faster on modern CPUs too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179977 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp55
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp84
-rw-r--r--test/CodeGen/ARM/gpr-paired-spill.ll40
4 files changed, 133 insertions, 50 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7a8077e..0d1417d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -747,10 +747,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Mov->addRegisterKilled(SrcReg, TRI);
}
-static const
-MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
- unsigned Reg, unsigned SubIdx, unsigned State,
- const TargetRegisterInfo *TRI) {
+const MachineInstrBuilder &
+ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const {
if (!SubIdx)
return MIB.addReg(Reg, State);
@@ -795,12 +795,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
- AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ if (Subtarget.hasV5TEOps()) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to STM instruction, which has existed since the dawn of
+ // time.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ }
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -948,7 +958,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
@@ -975,12 +984,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
- unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
- .addFrameIndex(FI).addMemOperand(MMO));
- MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MachineInstrBuilder MIB;
+
+ if (Subtarget.hasV5TEOps()) {
+ MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to LDM instruction, which has existed since the dawn of
+ // time.
+ MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ }
+
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 7c107bb..2ef659c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -141,6 +141,10 @@ public:
MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+ const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const;
+
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 67e8ec7..a1b48c2 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
@@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+ return;
+ }
+
ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
}
@@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ return;
+ }
+
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
@@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = -Offset;
isSub = true;
}
+ } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
+ Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+ NumBits = 8;
+ // MCInst operand has already scaled value.
+ Scale = 1;
+ if (Offset < 0) {
+ isSub = true;
+ Offset = -Offset;
+ }
} else {
llvm_unreachable("Unsupported addressing mode!");
}
diff --git a/test/CodeGen/ARM/gpr-paired-spill.ll b/test/CodeGen/ARM/gpr-paired-spill.ll
index 400d69d..8608832 100644
--- a/test/CodeGen/ARM/gpr-paired-spill.ll
+++ b/test/CodeGen/ARM/gpr-paired-spill.ll
@@ -1,29 +1,43 @@
-; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
+; RUN: llc -mtriple=armv4-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITHOUT-LDRD
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -debug -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=INSTRS-ARE-THUMB
define void @foo(i64* %addr) {
- %val1 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
- %val2 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
- %val3 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
- %val4 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
- %val5 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
- %val6 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
- %val7 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+ %val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+ %val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+ %val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+ %val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+ %val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+ %val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+ %val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
; Key point is that enough 64-bit paired GPR values are live that
; one of them has to be spilled. This used to cause an abort because
; an LDMIA was created with both a FrameIndex and an offset, which
; is not allowed.
+; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
+; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
+
+; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
+; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
+
; We also want to ensure the register scavenger is working (i.e. an
; offset from sp can be generated), so we need two spills.
-; CHECK: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
-; CHECK: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
-; CHECK: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
+; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
; In principle LLVM may have to recalculate the offset. At the moment
; it reuses the original though.
-; CHECK: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
-; CHECK: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+
+ ; Make sure we are actually creating the Thumb versions of the spill
+ ; instructions.
+; INSTRS-ARE-THUMB: t2STRDi8
+; INSTRS-ARE-THUMB: t2LDRDi8
store volatile i64 %val1, i64* %addr
store volatile i64 %val2, i64* %addr