aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h8
-rw-r--r--lib/Target/ARM/ARMAsmBackend.cpp17
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp10
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td21
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td19
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td68
-rw-r--r--lib/Target/ARM/ARMMachObjectWriter.cpp32
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td249
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.td12
-rw-r--r--lib/Target/CBackend/CBackend.cpp174
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp11
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp16
-rw-r--r--lib/Target/Mips/MipsCallingConv.td4
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp12
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp108
-rw-r--r--lib/Target/Mips/MipsISelLowering.h4
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td16
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h18
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp10
-rw-r--r--lib/Target/PTX/PTX.td4
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp25
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp71
-rw-r--r--lib/Target/PTX/PTXInstrInfo.h23
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td123
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.cpp27
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp37
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp14
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp3
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp6
-rw-r--r--lib/Target/TargetRegisterInfo.cpp6
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp6
-rw-r--r--lib/Target/X86/X86InstrSSE.td16
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp18
-rw-r--r--lib/Target/X86/X86RegisterInfo.h6
-rw-r--r--lib/Target/X86/X86RegisterInfo.td73
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp2
40 files changed, 728 insertions, 561 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 4679f74..8f77b04 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -16,6 +16,7 @@
#define TARGET_ARM_H
#include "ARMBaseInfo.h"
+#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
@@ -27,6 +28,7 @@ class FunctionPass;
class JITCodeEmitter;
class formatted_raw_ostream;
class MCCodeEmitter;
+class MCObjectWriter;
class TargetAsmBackend;
class MachineInstr;
class ARMAsmPrinter;
@@ -58,6 +60,12 @@ extern Target TheARMTarget, TheThumbTarget;
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
} // end namespace llvm;
#endif
diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp
index db132da..79e9897 100644
--- a/lib/Target/ARM/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/ARMAsmBackend.cpp
@@ -28,14 +28,6 @@
using namespace llvm;
namespace {
-class ARMMachObjectWriter : public MCMachObjectTargetWriter {
-public:
- ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
- /*UseAggressiveSymbolFolding=*/true) {}
-};
-
class ARMELFObjectWriter : public MCELFObjectTargetWriter {
public:
ARMELFObjectWriter(Triple::OSType OSType)
@@ -423,12 +415,9 @@ public:
: ARMAsmBackend(T), Subtype(st) { }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createMachObjectWriter(new ARMMachObjectWriter(
- /*Is64Bit=*/false,
- object::mach::CTM_ARM,
- Subtype),
- OS,
- /*IsLittleEndian=*/true);
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ Subtype);
}
void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 2adcd2c..9dc51b8 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -100,6 +100,12 @@ getReservedRegs(const MachineFunction &MF) const {
// Some targets reserve R9.
if (STI.isR9Reserved())
Reserved.set(ARM::R9);
+ // Reserve D16-D31 if the subtarget doesn't support them.
+ if (!STI.hasVFP3() || STI.hasD16()) {
+ assert(ARM::D31 == ARM::D16 + 15);
+ for (unsigned i = 0; i != 16; ++i)
+ Reserved.set(ARM::D16 + i);
+ }
return Reserved;
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 7c44c10..4ae4af1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -5587,7 +5587,6 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
// Create VPADDL node.
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- DebugLoc DL = N->getDebugLoc();
// Build operand list.
SmallVector<SDValue, 8> Ops;
@@ -7380,9 +7379,12 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
case 'l': return C_RegisterClass;
case 'w': return C_RegisterClass;
}
- } else {
- if (Constraint == "Uv")
- return C_Memory;
+ } else if (Constraint.size() == 2) {
+ switch (Constraint[0]) {
+ default: break;
+ // All 'U+' constraints are addresses.
+ case 'U': return C_Memory;
+ }
}
return TargetLowering::getConstraintType(Constraint);
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2537fc3..5c013de 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -3008,35 +3008,22 @@ def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rev", "\t$Rd, $Rm",
[(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+let AddedComplexity = 5 in
def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rev16", "\t$Rd, $Rm",
- [(set GPR:$Rd,
- (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
- (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
- (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
- (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+ [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
Requires<[IsARM, HasV6]>;
+let AddedComplexity = 5 in
def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "revsh", "\t$Rd, $Rm",
- [(set GPR:$Rd,
- (sext_inreg
- (or (srl GPR:$Rm, (i32 8)),
- (shl GPR:$Rm, (i32 8))), i16))]>,
+ [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
Requires<[IsARM, HasV6]>;
-def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
- (shl GPR:$Rm, (i32 8))), i16),
- (REVSH GPR:$Rm)>;
-
def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
(and (srl GPR:$Rm, (i32 8)), 0xFF)),
(REVSH GPR:$Rm)>;
-// Need the AddedComplexity or else MOVs + REV would be chosen.
-let AddedComplexity = 5 in
-def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>;
-
def lsl_shift_imm : SDNodeXForm<imm, [{
unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
return CurDAG->getTargetConstant(Sh, MVT::i32);
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 8430aa3..44fbc02 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1176,31 +1176,16 @@ def tREV16 : // A8.6.135
T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
IIC_iUNAr,
"rev16", "\t$Rd, $Rm",
- [(set tGPR:$Rd,
- (or (and (srl tGPR:$Rm, (i32 8)), 0xFF),
- (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00),
- (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000),
- (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+ [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
def tREVSH : // A8.6.136
T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
IIC_iUNAr,
"revsh", "\t$Rd, $Rm",
- [(set tGPR:$Rd,
- (sext_inreg
- (or (srl tGPR:$Rm, (i32 8)),
- (shl tGPR:$Rm, (i32 8))), i16))]>,
+ [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
- (shl tGPR:$Rm, (i32 8))), i16),
- (tREVSH tGPR:$Rm)>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
-
-def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>,
- Requires<[IsThumb, IsThumb1Only, HasV6]>;
-
// Rotate right register
def tROR : // A8.6.139
T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 53b9cec..090670b 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1206,29 +1206,6 @@ def t2SUBrSPs : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm),
}
} // end isCodeGenOnly = 1
-// Signed and unsigned division on v7-M
-def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
- "sdiv", "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-21} = 0b011100;
- let Inst{20} = 0b1;
- let Inst{15-12} = 0b1111;
- let Inst{7-4} = 0b1111;
-}
-
-def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
- "udiv", "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-21} = 0b011101;
- let Inst{20} = 0b1;
- let Inst{15-12} = 0b1111;
- let Inst{7-4} = 0b1111;
-}
-
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
@@ -2560,6 +2537,32 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
"\t$Ra, $Rd, $Rm, $Rn", []>;
//===----------------------------------------------------------------------===//
+// Division Instructions.
+// Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011100;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011101;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
//
@@ -2587,29 +2590,16 @@ def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"rev16", ".w\t$Rd, $Rm",
- [(set rGPR:$Rd,
- (or (and (srl rGPR:$Rm, (i32 8)), 0xFF),
- (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00),
- (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000),
- (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>;
+ [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"revsh", ".w\t$Rd, $Rm",
- [(set rGPR:$Rd,
- (sext_inreg
- (or (srl rGPR:$Rm, (i32 8)),
- (shl rGPR:$Rm, (i32 8))), i16))]>;
-
-def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
- (shl rGPR:$Rm, (i32 8))), i16),
- (t2REVSH rGPR:$Rm)>;
+ [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
- (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
+ (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
(t2REVSH rGPR:$Rm)>;
-def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>;
-
def t2PKHBT : T2ThreeReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
diff --git a/lib/Target/ARM/ARMMachObjectWriter.cpp b/lib/Target/ARM/ARMMachObjectWriter.cpp
new file mode 100644
index 0000000..4c35d0b
--- /dev/null
+++ b/lib/Target/ARM/ARMMachObjectWriter.cpp
@@ -0,0 +1,32 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+using namespace llvm;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/true) {}
+};
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index f4fbae3..7741410 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -202,42 +202,14 @@ def FPEXC : ARMReg<8, "fpexc">;
//
def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
SP, LR, PC)> {
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned ARM_GPR_AO[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
-
- // For Thumb1 mode, we don't want to allocate hi regs at all, as we
- // don't know how to spill them. If we make our prologue/epilogue code
- // smarter at some point, we can go back to using the above allocation
- // orders for the Thumb1 instructions that know how to use hi regs.
- static const unsigned THUMB_GPR_AO[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
-
- GPRClass::iterator
- GPRClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.isThumb1Only())
- return THUMB_GPR_AO;
- return ARM_GPR_AO;
- }
-
- GPRClass::iterator
- GPRClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.isThumb1Only())
- return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
- return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
- }
+ // Allocate LR as the first CSR since it is always saved anyway.
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+ // know how to spill them. If we make our prologue/epilogue code smarter at
+ // some point, we can go back to using the above allocation orders for the
+ // Thumb1 instructions that know how to use hi regs.
+ let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
}];
}
@@ -246,44 +218,9 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
// or SP (R13 or R15) are used. The ARM ISA refers to these operands
// via the BadReg() pseudo-code description.
def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned ARM_rGPR_AO[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10,
- ARM::R11 };
-
- // For Thumb1 mode, we don't want to allocate hi regs at all, as we
- // don't know how to spill them. If we make our prologue/epilogue code
- // smarter at some point, we can go back to using the above allocation
- // orders for the Thumb1 instructions that know how to use hi regs.
- static const unsigned THUMB_rGPR_AO[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
-
- rGPRClass::iterator
- rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.isThumb1Only())
- return THUMB_rGPR_AO;
- return ARM_rGPR_AO;
- }
-
- rGPRClass::iterator
- rGPRClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-
- if (Subtarget.isThumb1Only())
- return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
- return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
- }
+ let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
}];
}
@@ -296,52 +233,12 @@ def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
// this class and the preceding one(!) This is what we want.
def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- // R9 is available.
- static const unsigned ARM_GPR_R9_TC[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R9, ARM::R12 };
- // R9 is not available.
- static const unsigned ARM_GPR_NOR9_TC[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12 };
-
- // For Thumb1 mode, we don't want to allocate hi regs at all, as we
- // don't know how to spill them. If we make our prologue/epilogue code
- // smarter at some point, we can go back to using the above allocation
- // orders for the Thumb1 instructions that know how to use hi regs.
- static const unsigned THUMB_GPR_AO_TC[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
- tcGPRClass::iterator
- tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.isThumb1Only())
- return THUMB_GPR_AO_TC;
- return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
- }
-
- tcGPRClass::iterator
- tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-
- if (Subtarget.isThumb1Only())
- return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
-
- return Subtarget.isTargetDarwin() ?
- ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
- ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- }
+ let AltOrders = [(and tcGPR, tGPR)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
}];
}
-
// Scalar single precision floating point register class..
def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
@@ -355,48 +252,9 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
// is double-word alignment though.
def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
(sequence "D%u", 0, 31)> {
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- // VFP2 / VFPv3-D16
- static const unsigned ARM_DPR_VFP2[] = {
- ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7,
- ARM::D8, ARM::D9, ARM::D10, ARM::D11,
- ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
- // VFP3: D8-D15 are callee saved and should be allocated last.
- // Save other low registers for use as DPR_VFP2 and DPR_8 classes.
- static const unsigned ARM_DPR_VFP3[] = {
- ARM::D16, ARM::D17, ARM::D18, ARM::D19,
- ARM::D20, ARM::D21, ARM::D22, ARM::D23,
- ARM::D24, ARM::D25, ARM::D26, ARM::D27,
- ARM::D28, ARM::D29, ARM::D30, ARM::D31,
- ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7,
- ARM::D8, ARM::D9, ARM::D10, ARM::D11,
- ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
-
- DPRClass::iterator
- DPRClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.hasVFP3() && !Subtarget.hasD16())
- return ARM_DPR_VFP3;
- return ARM_DPR_VFP2;
- }
-
- DPRClass::iterator
- DPRClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.hasVFP3() && !Subtarget.hasD16())
- return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned));
- else
- return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned));
- }
- }];
+ // Allocate non-VFP2 registers D16-D31 first.
+ let AltOrders = [(rotl DPR, 16)];
+ let AltOrderSelect = [{ return 1; }];
}
// Subset of DPR that are accessible with VFP2 (and so that also have
@@ -417,29 +275,9 @@ def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
(sequence "Q%u", 0, 15)> {
let SubRegClasses = [(DPR dsub_0, dsub_1)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- // Q4-Q7 are callee saved and should be allocated last.
- // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
- static const unsigned ARM_QPR[] = {
- ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
- ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15,
- ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
- ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7 };
-
- QPRClass::iterator
- QPRClass::allocation_order_begin(const MachineFunction &MF) const {
- return ARM_QPR;
- }
-
- QPRClass::iterator
- QPRClass::allocation_order_end(const MachineFunction &MF) const {
- return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned));
- }
- }];
+ // Allocate non-VFP2 aliases Q8-Q15 first.
+ let AltOrders = [(rotl QPR, 8)];
+ let AltOrderSelect = [{ return 1; }];
}
// Subset of QPR that have 32-bit SPR subregs.
@@ -461,27 +299,9 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
(QPR qsub_0, qsub_1)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- // QQ2-QQ3 are callee saved and should be allocated last.
- // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
- static const unsigned ARM_QQPR[] = {
- ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7,
- ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 };
-
- QQPRClass::iterator
- QQPRClass::allocation_order_begin(const MachineFunction &MF) const {
- return ARM_QQPR;
- }
-
- QQPRClass::iterator
- QQPRClass::allocation_order_end(const MachineFunction &MF) const {
- return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned));
- }
- }];
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQPR, 4)];
+ let AltOrderSelect = [{ return 1; }];
}
// Subset of QQPR that have 32-bit SPR subregs.
@@ -498,26 +318,9 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
dsub_4, dsub_5, dsub_6, dsub_7),
(QPR qsub_0, qsub_1, qsub_2, qsub_3)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- // QQQQ1 is callee saved and should be allocated last.
- // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes.
- static const unsigned ARM_QQQQPR[] = {
- ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 };
-
- QQQQPRClass::iterator
- QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const {
- return ARM_QQQQPR;
- }
-
- QQQQPRClass::iterator
- QQQQPRClass::allocation_order_end(const MachineFunction &MF) const {
- return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned));
- }
- }];
+ // Allocate non-VFP2 aliases first.
+ let AltOrders = [(rotl QQQQPR, 2)];
+ let AltOrderSelect = [{ return 1; }];
}
// Condition code registers.
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index d3b8b54..edc0054 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_target(ARMCodeGen
ARMISelLowering.cpp
ARMInstrInfo.cpp
ARMJITInfo.cpp
+ ARMMachObjectWriter.cpp
ARMMCCodeEmitter.cpp
ARMMCExpr.cpp
ARMLoadStoreOptimizer.cpp
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index 9e2f79f..0d502fd 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -254,17 +254,7 @@ def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
let CopyCost = -1, Size = 8 in {
def JustCC : RegisterClass<"BF", [i32], 8, (add CC)>;
def NotCC : RegisterClass<"BF", [i32], 8, (add NCC)>;
-def AnyCC : RegisterClass<"BF", [i32], 8, (add CC, NCC)> {
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- AnyCCClass::iterator
- AnyCCClass::allocation_order_end(const MachineFunction &MF) const {
- return allocation_order_begin(MF)+1;
- }
- }];
-}
+def AnyCC : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
def StatBit : RegisterClass<"BF", [i1], 8,
(add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
}
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 7c24037..ec4020e 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -205,6 +205,9 @@ namespace {
std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
void lowerIntrinsics(Function &F);
+ /// Prints the definition of the intrinsic function F. Supports the
+ /// intrinsics which need to be explicitly defined in the CBackend.
+ void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
void printModuleTypes(const TypeSymbolTable &ST);
void printContainedStructs(const Type *Ty, std::set<const Type *> &);
@@ -1777,6 +1780,7 @@ bool CWriter::doInitialization(Module &M) {
Out << "/* Provide Declarations */\n";
Out << "#include <stdarg.h>\n"; // Varargs support
Out << "#include <setjmp.h>\n"; // Unwind support
+ Out << "#include <limits.h>\n"; // With overflow intrinsics support.
generateCompilerSpecificCode(Out, TD);
// Provide a definition for `bool' if not compiling with a C++ compiler.
@@ -1855,29 +1859,46 @@ bool CWriter::doInitialization(Module &M) {
Out << "float fmodf(float, float);\n";
Out << "long double fmodl(long double, long double);\n";
+ // Store the intrinsics which will be declared/defined below.
+ SmallVector<const Function*, 8> intrinsicsToDefine;
+
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
// Don't print declarations for intrinsic functions.
- if (!I->isIntrinsic() && I->getName() != "setjmp" &&
- I->getName() != "longjmp" && I->getName() != "_setjmp") {
- if (I->hasExternalWeakLinkage())
- Out << "extern ";
- printFunctionSignature(I, true);
- if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
- Out << " __ATTRIBUTE_WEAK__";
- if (I->hasExternalWeakLinkage())
- Out << " __EXTERNAL_WEAK__";
- if (StaticCtors.count(I))
- Out << " __ATTRIBUTE_CTOR__";
- if (StaticDtors.count(I))
- Out << " __ATTRIBUTE_DTOR__";
- if (I->hasHiddenVisibility())
- Out << " __HIDDEN__";
-
- if (I->hasName() && I->getName()[0] == 1)
- Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+ // Store the used intrinsics, which need to be explicitly defined.
+ if (I->isIntrinsic()) {
+ switch (I->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ intrinsicsToDefine.push_back(I);
+ break;
+ }
+ continue;
+ }
+
+ if (I->getName() == "setjmp" ||
+ I->getName() == "longjmp" || I->getName() == "_setjmp")
+ continue;
+
+ if (I->hasExternalWeakLinkage())
+ Out << "extern ";
+ printFunctionSignature(I, true);
+ if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (StaticCtors.count(I))
+ Out << " __ATTRIBUTE_CTOR__";
+ if (StaticDtors.count(I))
+ Out << " __ATTRIBUTE_DTOR__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ if (I->hasName() && I->getName()[0] == 1)
+ Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
- Out << ";\n";
- }
+ Out << ";\n";
}
// Output the global variable declarations
@@ -2012,6 +2033,14 @@ bool CWriter::doInitialization(Module &M) {
Out << "return X <= Y ; }\n";
Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
Out << "return X >= Y ; }\n";
+
+ // Emit definitions of the intrinsics.
+ for (SmallVector<const Function*, 8>::const_iterator
+ I = intrinsicsToDefine.begin(),
+ E = intrinsicsToDefine.end(); I != E; ++I) {
+ printIntrinsicDefinition(**I, Out);
+ }
+
return false;
}
@@ -2786,6 +2815,101 @@ void CWriter::visitSelectInst(SelectInst &I) {
Out << "))";
}
+// Returns the macro name or value of the max or min of an integer type
+// (as defined in limits.h).
+static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
+ raw_ostream &Out) {
+ const char* type;
+ const char* sprefix = "";
+
+ unsigned NumBits = Ty.getBitWidth();
+ if (NumBits <= 8) {
+ type = "CHAR";
+ sprefix = "S";
+ } else if (NumBits <= 16) {
+ type = "SHRT";
+ } else if (NumBits <= 32) {
+ type = "INT";
+ } else if (NumBits <= 64) {
+ type = "LLONG";
+ } else {
+ llvm_unreachable("Bit widths > 64 not implemented yet");
+ }
+
+ if (isSigned)
+ Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
+ else
+ Out << "U" << type << (isMax ? "_MAX" : "0");
+}
+
+static bool isSupportedIntegerSize(const IntegerType &T) {
+ return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
+ T.getBitWidth() == 32 || T.getBitWidth() == 64;
+}
+
+void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
+ const FunctionType *funT = F.getFunctionType();
+ const Type *retT = F.getReturnType();
+ const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+
+ assert(isSupportedIntegerSize(*elemT) &&
+ "CBackend does not support arbitrary size integers.");
+ assert(cast<StructType>(retT)->getElementType(0) == elemT &&
+ elemT == funT->getParamType(0) && funT->getNumParams() == 2);
+
+ switch (F.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unsupported Intrinsic.");
+ case Intrinsic::uadd_with_overflow:
+ // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
+ // Rty r;
+ // r.field0 = a + b;
+ // r.field1 = (r.field0 < a);
+ // return r;
+ // }
+ Out << "static inline ";
+ printType(Out, retT);
+ Out << GetValueName(&F);
+ Out << "(";
+ printSimpleType(Out, elemT, false);
+ Out << "a,";
+ printSimpleType(Out, elemT, false);
+ Out << "b) {\n ";
+ printType(Out, retT);
+ Out << "r;\n";
+ Out << " r.field0 = a + b;\n";
+ Out << " r.field1 = (r.field0 < a);\n";
+ Out << " return r;\n}\n";
+ break;
+
+ case Intrinsic::sadd_with_overflow:
+ // static inline Rty sadd_ixx(ixx a, ixx b) {
+ // Rty r;
+ // r.field1 = (b > 0 && a > XX_MAX - b) ||
+ // (b < 0 && a < XX_MIN - b);
+ // r.field0 = r.field1 ? 0 : a + b;
+ // return r;
+ // }
+ Out << "static ";
+ printType(Out, retT);
+ Out << GetValueName(&F);
+ Out << "(";
+ printSimpleType(Out, elemT, true);
+ Out << "a,";
+ printSimpleType(Out, elemT, true);
+ Out << "b) {\n ";
+ printType(Out, retT);
+ Out << "r;\n";
+ Out << " r.field1 = (b > 0 && a > ";
+ printLimitValue(*elemT, true, true, Out);
+ Out << " - b) || (b < 0 && a < ";
+ printLimitValue(*elemT, true, false, Out);
+ Out << " - b);\n";
+ Out << " r.field0 = r.field1 ? 0 : a + b;\n";
+ Out << " return r;\n}\n";
+ break;
+ }
+}
void CWriter::lowerIntrinsics(Function &F) {
// This is used to keep track of intrinsics that get generated to a lowered
@@ -2816,6 +2940,8 @@ void CWriter::lowerIntrinsics(Function &F) {
case Intrinsic::x86_sse2_cmp_sd:
case Intrinsic::x86_sse2_cmp_pd:
case Intrinsic::ppc_altivec_lvsl:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
// We directly implement these intrinsics
break;
default:
@@ -3109,6 +3235,14 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
writeOperand(I.getArgOperand(0));
Out << ")";
return true;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ Out << GetValueName(I.getCalledFunction()) << "(";
+ writeOperand(I.getArgOperand(0));
+ Out << ", ";
+ writeOperand(I.getArgOperand(1));
+ Out << ")";
+ return true;
}
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 797cfd5..0d15514 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -989,12 +989,12 @@ void CppWriter::printVariableUses(const GlobalVariable *GV) {
nl(Out);
printType(GV->getType());
if (GV->hasInitializer()) {
- Constant *Init = GV->getInitializer();
+ const Constant *Init = GV->getInitializer();
printType(Init->getType());
- if (Function *F = dyn_cast<Function>(Init)) {
+ if (const Function *F = dyn_cast<Function>(Init)) {
nl(Out)<< "/ Function Declarations"; nl(Out);
printFunctionHead(F);
- } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
nl(Out) << "// Global Variable Declarations"; nl(Out);
printVariableHead(gv);
@@ -1353,9 +1353,10 @@ void CppWriter::printInstruction(const Instruction *I,
printEscapedString(phi->getName());
Out << "\", " << bbname << ");";
nl(Out);
- for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+ for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
Out << iName << "->addIncoming("
- << opNames[i] << ", " << opNames[i+1] << ");";
+ << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", "
+ << getOpName(phi->getIncomingBlock(i)) << ");";
nl(Out);
}
break;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8caa7cd..6f69ba3 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -56,6 +56,9 @@ namespace {
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
@@ -304,6 +307,19 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
+bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "0($" << MipsAsmPrinter::getRegisterName(MO.getReg()) << ")";
+ return false;
+}
+
void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(opNum);
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 57aeb1d..876f0fc 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -20,8 +20,8 @@ class CCIfSubtarget<string F, CCAction A>:
// Only the return rules are defined here for O32. The rules for argument
// passing are defined in MipsISelLowering.cpp.
def RetCC_MipsO32 : CallingConv<[
- // i32 are returned in registers V0, V1
- CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+ // i32 are returned in registers V0, V1, A0, A1
+ CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
// f32 are returned in registers F0, F2
CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index d8a84ce..c35c852 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -94,6 +94,10 @@ private:
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
};
}
@@ -462,6 +466,14 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
return ResNode;
}
+bool MipsDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ OutOps.push_back(Op);
+ return false;
+}
+
/// createMipsISelDag - This pass converts a legalized DAG into a
/// MIPS-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c42054e..01624c5 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -59,6 +59,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC";
+ case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
default: return NULL;
}
}
@@ -1189,9 +1190,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
SDValue MipsTargetLowering::
LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
{
- unsigned StackAlignment =
- getTargetMachine().getFrameLowering()->getStackAlignment();
- assert(StackAlignment >=
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
"Cannot lower if the alignment of the allocated space is larger than \
that of the stack.");
@@ -1211,24 +1213,14 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
// must be placed in the stack pointer register.
Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
SDValue());
- // Retrieve updated $sp. There is a glue input to prevent instructions that
- // clobber $sp from being inserted between copytoreg and copyfromreg.
- SDValue NewSP = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32,
- Chain.getValue(1));
-
- // The stack space reserved by alloca is located right above the argument
- // area. It is aligned on a boundary that is a multiple of StackAlignment.
- MachineFunction &MF = DAG.getMachineFunction();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- unsigned SPOffset = (MipsFI->getMaxCallFrameSize() + StackAlignment - 1) /
- StackAlignment * StackAlignment;
- SDValue AllocPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
- DAG.getConstant(SPOffset, MVT::i32));
// This node always has two return values: a new stack pointer
// value and a chain
- SDValue Ops[2] = { AllocPtr, NewSP.getValue(1) };
- return DAG.getMergeValues(Ops, 2, dl);
+ SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+ SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
+ SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
+
+ return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
}
SDValue MipsTargetLowering::
@@ -1358,7 +1350,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
// General Dynamic TLS Model
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
- 0, MipsII::MO_TLSGD);
+ 0, MipsII::MO_TLSGD);
SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
@@ -1370,36 +1362,36 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
Args.push_back(Entry);
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(DAG.getEntryNode(),
- (const Type *) Type::getInt32Ty(*DAG.getContext()),
- false, false, false, false,
- 0, CallingConv::C, false, true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ (const Type *) Type::getInt32Ty(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C, false, true,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
+ dl);
return CallResult.first;
- } else {
- SDValue Offset;
- if (GV->isDeclaration()) {
- // Initial Exec TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_GOTTPREL);
- Offset = DAG.getLoad(MVT::i32, dl,
- DAG.getEntryNode(), TGA, MachinePointerInfo(),
- false, false, 0);
- } else {
- // Local Exec TLS Model
- SDVTList VTs = DAG.getVTList(MVT::i32);
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_TPREL_HI);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_TPREL_LO);
- SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
- SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
- Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
- }
+ }
- SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
- return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+ SDValue Offset;
+ if (GV->isDeclaration()) {
+ // Initial Exec TLS Model
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_GOTTPREL);
+ Offset = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), TGA, MachinePointerInfo(),
+ false, false, 0);
+ } else {
+ // Local Exec TLS Model
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_TPREL_HI);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_TPREL_LO);
+ SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
+ SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
+ Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
}
+
+ SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
}
SDValue MipsTargetLowering::
@@ -1770,6 +1762,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (IsPIC && !MipsFI->getGPFI())
MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
+ // Get the frame index of the stack frame object that points to the location
+ // of dynamically allocated area on the stack.
+ int DynAllocFI = MipsFI->getDynAllocFI();
+
// Update size of the maximum argument space.
// For O32, a minimum of four words (16 bytes) of argument space is
// allocated.
@@ -1781,14 +1777,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (MaxCallFrameSize < NextStackOffset) {
MipsFI->setMaxCallFrameSize(NextStackOffset);
- if (IsPIC) {
- // $gp restore slot must be aligned.
- unsigned StackAlignment = TFL->getStackAlignment();
- NextStackOffset = (NextStackOffset + StackAlignment - 1) /
- StackAlignment * StackAlignment;
- int GPFI = MipsFI->getGPFI();
- MFI->setObjectOffset(GPFI, NextStackOffset);
- }
+ // Set the offsets relative to $sp of the $gp restore slot and dynamically
+ // allocated stack space. These offsets must be aligned to a boundary
+ // determined by the stack alignment of the ABI.
+ unsigned StackAlignment = TFL->getStackAlignment();
+ NextStackOffset = (NextStackOffset + StackAlignment - 1) /
+ StackAlignment * StackAlignment;
+
+ if (IsPIC)
+ MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
+
+ MFI->setObjectOffset(DynAllocFI, NextStackOffset);
}
// With EABI is it possible to have 16 args on registers.
@@ -1965,7 +1964,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NextStackOffset, true),
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NextStackOffset, true),
DAG.getIntPtrConstant(0, true), InFlag);
InFlag = Chain.getValue(1);
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index fbcedfd..b7b85fd 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -79,7 +79,9 @@ namespace llvm {
BuildPairF64,
ExtractElementF64,
- WrapperPIC
+ WrapperPIC,
+
+ DynAlloc
};
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 329a002..0651322 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -39,6 +39,9 @@ def SDT_MipsDivRem : SDTypeProfile<0, 2,
def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, iPTR>]>;
+
// Call
def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
@@ -99,6 +102,10 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>;
+// Pointer to dynamically allocated stack area.
+def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
+ [SDNPHasChain, SDNPInGlue]>;
+
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
@@ -675,6 +682,12 @@ let addr=0 in
// can be matched. It's similar to Sparc LEA_ADDRi
def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
+// DynAlloc node points to dynamically allocated stack space.
+// $sp is added to the list of implicitly used registers to prevent dead code
+// elimination from removing instructions that modify $sp.
+let Uses = [SP] in
+def DynAlloc : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
+
// MADD*/MSUB*
def MADD : MArithR<0, "madd", MipsMAdd, 1>;
def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
@@ -852,6 +865,9 @@ def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
(XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+
//===----------------------------------------------------------------------===//
// Floating Point Support
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index df40e6c..dbb7a67 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -27,6 +27,7 @@ namespace llvm {
class MipsFunctionInfo : public MachineFunctionInfo {
private:
+ MachineFunction& MF;
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
@@ -47,6 +48,7 @@ private:
// LowerCall except for the frame object for restoring $gp.
std::pair<int, int> InArgFIRange, OutArgFIRange;
int GPFI; // Index of the frame object for restoring $gp
+ mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize;
/// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
@@ -55,10 +57,10 @@ private:
int AtomicFrameIndex;
public:
MipsFunctionInfo(MachineFunction& MF)
- : SRetReturnReg(0), GlobalBaseReg(0),
+ : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
- OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0),
- AtomicFrameIndex(-1)
+ OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
+ MaxCallFrameSize(0), AtomicFrameIndex(-1)
{}
bool isInArgFI(int FI) const {
@@ -81,6 +83,16 @@ public:
bool needGPSaveRestore() const { return getGPFI(); }
bool isGPFI(int FI) const { return GPFI && GPFI == FI; }
+ // The first call to this function creates a frame object for dynamically
+ // allocated stack area.
+ int getDynAllocFI() const {
+ if (!DynAllocFI)
+ DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
+
+ return DynAllocFI;
+ }
+ bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
+
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index b0984af..fa64f63 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -179,12 +179,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
int Offset;
// Calculate final offset.
- // - There is no need to change the offset if the frame object is an outgoing
- // argument or a $gp restore location,
+ // - There is no need to change the offset if the frame object is one of the
+ // following: an outgoing argument, pointer to a dynamically allocated
+ // stack space or a $gp restore location,
// - If the frame object is any of the following, its offset must be adjusted
// by adding the size of the stack:
// incoming argument, callee-saved register location or local variable.
- if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex))
+ if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
+ MipsFI->isDynAllocFI(FrameIndex))
Offset = spOffset;
else
Offset = spOffset + stackSize;
@@ -213,7 +215,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// 3. Locations for callee-saved registers.
// Everything else is referenced relative to whatever register
// getFrameRegister() returns.
- if (MipsFI->isOutArgFI(FrameIndex) ||
+ if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
(FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
FrameReg = Mips::SP;
else
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 2c7bd3b..6a36b24 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -16,7 +16,7 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
-// Subtarget Features.
+// Subtarget Features
//===----------------------------------------------------------------------===//
//===- Architectural Features ---------------------------------------------===//
@@ -57,7 +57,7 @@ def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
[FeatureSM13]>;
//===----------------------------------------------------------------------===//
-// PTX supported processors.
+// PTX supported processors
//===----------------------------------------------------------------------===//
class Proc<string Name, list<SubtargetFeature> Features>
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 1142144..b1f7c1e 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -162,6 +163,13 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
(ST.supportsDouble() ? ""
: ", map_f64_to_f32")));
+ // .address_size directive is optional, but it must immediately follow
+ // the .target directive if present within a module
+ if (ST.supportsPTX23()) {
+ std::string addrSize = ST.is64Bit() ? "64" : "32";
+ OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+ }
+
OutStreamer.AddBlankLine();
// declare global variables
@@ -194,6 +202,21 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
def += ';';
OutStreamer.EmitRawText(Twine(def));
}
+
+ const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+ DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
+ << " frame object(s)\n");
+ for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+ DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+ if (FrameInfo->getObjectSize(i) > 0) {
+ std::string def = "\t.reg .b";
+ def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
+ def += " s";
+ def += utostr(i);
+ def += ";";
+ OutStreamer.EmitRawText(Twine(def));
+ }
+ }
}
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -346,7 +369,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
if (gv->hasInitializer())
{
- Constant *C = gv->getInitializer();
+ const Constant *C = gv->getInitializer();
if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
{
decl += " = {";
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index c305c05..5bdac89 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -288,6 +288,77 @@ InsertBranch(MachineBasicBlock &MBB,
}
}
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineInstr& MI = *MII;
+ DebugLoc DL = MI.getDebugLoc();
+
+ DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
+
+ int OpCode;
+
+ // Select the appropriate opcode based on the register class
+ if (RC == PTX::RegI16RegisterClass) {
+ OpCode = PTX::STACKSTOREI16;
+ } else if (RC == PTX::RegI32RegisterClass) {
+ OpCode = PTX::STACKSTOREI32;
+ } else if (RC == PTX::RegI64RegisterClass) {
+ OpCode = PTX::STACKSTOREI32;
+ } else if (RC == PTX::RegF32RegisterClass) {
+ OpCode = PTX::STACKSTOREF32;
+ } else if (RC == PTX::RegF64RegisterClass) {
+ OpCode = PTX::STACKSTOREF64;
+ } else {
+ llvm_unreachable("Unknown PTX register class!");
+ }
+
+ // Build the store instruction (really a mov)
+ MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+ MIB.addFrameIndex(FrameIdx);
+ MIB.addReg(SrcReg);
+
+ AddDefaultPredicate(MIB);
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineInstr& MI = *MII;
+ DebugLoc DL = MI.getDebugLoc();
+
+ DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
+
+ int OpCode;
+
+ // Select the appropriate opcode based on the register class
+ if (RC == PTX::RegI16RegisterClass) {
+ OpCode = PTX::STACKLOADI16;
+ } else if (RC == PTX::RegI32RegisterClass) {
+ OpCode = PTX::STACKLOADI32;
+ } else if (RC == PTX::RegI64RegisterClass) {
+ OpCode = PTX::STACKLOADI32;
+ } else if (RC == PTX::RegF32RegisterClass) {
+ OpCode = PTX::STACKLOADF32;
+ } else if (RC == PTX::RegF64RegisterClass) {
+ OpCode = PTX::STACKLOADF64;
+ } else {
+ llvm_unreachable("Unknown PTX register class!");
+ }
+
+ // Build the load instruction (really a mov)
+ MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+ MIB.addReg(DestReg);
+ MIB.addFrameIndex(FrameIdx);
+
+ AddDefaultPredicate(MIB);
+}
+
// static helper routines
MachineSDNode *PTXInstrInfo::
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index a04be77..a2eea25 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -84,6 +84,29 @@ public:
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
+ // Memory operand folding for spills
+ // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+ // loadRegFromStackSlot. Doing so will get rid of the "stack" registers
+ // we currently use to spill, though I doubt the overall effect on ptxas
+ // output will be large. I have yet to see a case where ptxas is unable
+ // to see through the "stack" register usage and hence generates
+ // efficient code anyway.
+ // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ // MachineInstr* MI,
+ // const SmallVectorImpl<unsigned> &Ops,
+ // int FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass* RC,
+ const TargetRegisterInfo* TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
// static helper routines
static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index 71f7cc3..cc74944 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -584,24 +584,39 @@ defm REM : INT3<"rem", urem>;
defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
// Standard Binary Operations
-defm FADD : PTX_FLOAT_3OP<"add", fadd>;
-defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
-defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
-
-// TODO: Allow user selection of rounding modes for fdiv.
-// For division, we need to have f32 and f64 differently.
-// For f32, we just always use .approx since it is supported on all hardware
-// for PTX 1.4+, which is our minimum target.
-def FDIVrr32 : InstPTX<(outs RegF32:$d),
+defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
+
+// For floating-point division:
+// SM_13+ defaults to .rn for f32 and f64,
+// SM10 must *not* provide a rounding
+
+// TODO:
+// - Allow user selection of rounding modes for fdiv
+// - Add support for -prec-div=false (.approx)
+
+def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, RegF32:$b),
+ "div.rn.f32\t$d, $a, $b",
+ [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+ Requires<[SupportsSM13]>;
+def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
+ (ins RegF32:$a, f32imm:$b),
+ "div.rn.f32\t$d, $a, $b",
+ [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+ Requires<[SupportsSM13]>;
+def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
(ins RegF32:$a, RegF32:$b),
- "div.approx.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>;
-def FDIVri32 : InstPTX<(outs RegF32:$d),
+ "div.f32\t$d, $a, $b",
+ [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+ Requires<[DoesNotSupportSM13]>;
+def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
(ins RegF32:$a, f32imm:$b),
- "div.approx.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>;
+ "div.f32\t$d, $a, $b",
+ [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+ Requires<[DoesNotSupportSM13]>;
-// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
(ins RegF64:$a, RegF64:$b),
"div.rn.f64\t$d, $a, $b",
@@ -681,6 +696,10 @@ defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
+defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">;
+defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">;
+defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">;
+defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">;
// Compare u32
@@ -690,6 +709,10 @@ defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
+defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">;
+defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">;
+defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">;
+defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">;
// Compare u64
@@ -699,6 +722,10 @@ defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
+defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">;
+defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">;
+defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">;
+defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">;
// Compare f32
@@ -811,31 +838,35 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
// TODO: Do something with st.param if/when it is needed.
// Conversion to pred
-
+// PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero. This follows
+// the C convention that any non-zero value is equivalent to 'true'.
def CVT_pred_u16
- : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "cvt.pred.u16\t$d, $a",
+ : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.b16\t$d, $a, 0",
[(set RegPred:$d, (trunc RegI16:$a))]>;
def CVT_pred_u32
- : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "cvt.pred.u32\t$d, $a",
+ : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.b32\t$d, $a, 0",
[(set RegPred:$d, (trunc RegI32:$a))]>;
def CVT_pred_u64
- : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "cvt.pred.u64\t$d, $a",
+ : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.b64\t$d, $a, 0",
[(set RegPred:$d, (trunc RegI64:$a))]>;
def CVT_pred_f32
- : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "cvt.rni.pred.f32\t$d, $a",
+ : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.b32\t$d, $a, 0",
[(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
def CVT_pred_f64
- : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "cvt.rni.pred.f64\t$d, $a",
+ : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.b64\t$d, $a, 0",
[(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
// Conversion to u16
-
+// PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
def CVT_u16_pred
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "cvt.u16.pred\t$d, $a",
+ : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
[(set RegI16:$d, (zext RegPred:$a))]>;
def CVT_u16_u32
@@ -847,17 +878,17 @@ def CVT_u16_u64
[(set RegI16:$d, (trunc RegI64:$a))]>;
def CVT_u16_f32
- : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rni.u16.f32\t$d, $a",
+ : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
[(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
def CVT_u16_f64
- : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rni.u16.f64\t$d, $a",
+ : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
[(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
// Conversion to u32
def CVT_u32_pred
- : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "cvt.u32.pred\t$d, $a",
+ : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
[(set RegI32:$d, (zext RegPred:$a))]>;
def CVT_u32_u16
@@ -869,17 +900,17 @@ def CVT_u32_u64
[(set RegI32:$d, (trunc RegI64:$a))]>;
def CVT_u32_f32
- : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rni.u32.f32\t$d, $a",
+ : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
[(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
def CVT_u32_f64
- : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rni.u32.f64\t$d, $a",
+ : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
[(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
// Conversion to u64
def CVT_u64_pred
- : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "cvt.u64.pred\t$d, $a",
+ : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
[(set RegI64:$d, (zext RegPred:$a))]>;
def CVT_u64_u16
@@ -891,17 +922,18 @@ def CVT_u64_u32
[(set RegI64:$d, (zext RegI32:$a))]>;
def CVT_u64_f32
- : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rni.u64.f32\t$d, $a",
+ : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
[(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
def CVT_u64_f64
- : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rni.u64.f64\t$d, $a",
+ : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
[(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
// Conversion to f32
def CVT_f32_pred
- : InstPTX<(outs RegF32:$d), (ins RegPred:$a), "cvt.rn.f32.pred\t$d, $a",
+ : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
+ "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0
[(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
def CVT_f32_u16
@@ -923,7 +955,8 @@ def CVT_f32_f64
// Conversion to f64
def CVT_f64_pred
- : InstPTX<(outs RegF64:$d), (ins RegPred:$a), "cvt.rn.f64.pred\t$d, $a",
+ : InstPTX<(outs RegF64:$d), (ins RegPred:$a),
+ "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0
[(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
def CVT_f64_u16
@@ -962,6 +995,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
}
+///===- Spill Instructions ------------------------------------------------===//
+// Special instructions used for stack spilling
+def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
+ "mov.u16\ts$d, $a", []>;
+def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
+ "mov.u32\ts$d, $a", []>;
+def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
+ "mov.u64\ts$d, $a", []>;
+def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
+ "mov.f32\ts$d, $a", []>;
+def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
+ "mov.f64\ts$d, $a", []>;
+
+def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
+ "mov.u16\t$d, s$a", []>;
+def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
+ "mov.u32\t$d, s$a", []>;
+def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
+ "mov.u64\t$d, s$a", []>;
+def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
+ "mov.f32\t$d, s$a", []>;
+def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
+ "mov.f64\t$d, s$a", []>;
+
///===- Intrinsic Instructions --------------------------------------------===//
include "PTXIntrinsicInstrInfo.td"
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index 0f3e7bc..b7c7ee5 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -13,7 +13,34 @@
#include "PTX.h"
#include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#include "PTXGenRegisterInfo.inc"
+
+
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ RegScavenger *RS) const {
+ unsigned Index;
+ MachineInstr& MI = *II;
+
+ Index = 0;
+ while (!MI.getOperand(Index).isFI()) {
+ ++Index;
+ assert(Index < MI.getNumOperands() &&
+ "Instr does not have a FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(Index).getIndex();
+
+ DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
+ DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
+ DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+
+ // This frame index is post stack slot re-use assignments
+ MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+}
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
index dc56352..223e965 100644
--- a/lib/Target/PTX/PTXRegisterInfo.h
+++ b/lib/Target/PTX/PTXRegisterInfo.h
@@ -38,11 +38,9 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo {
return Reserved; // reserve no regs
}
- virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj,
- RegScavenger *RS = NULL) const {
- llvm_unreachable("PTX does not support general function call");
- }
+ RegScavenger *RS = NULL) const;
virtual unsigned getFrameRegister(const MachineFunction &MF) const {
llvm_unreachable("PTX does not have a frame register");
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 511bb22..2176c02 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -610,6 +610,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
unsigned Imm;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
if (isInt32Immediate(N->getOperand(1), Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
@@ -624,6 +627,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
case ISD::SETNE: {
+ if (isPPC64) break;
SDValue AD =
SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(~0U)), 0);
@@ -647,6 +651,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
switch (CC) {
default: break;
case ISD::SETEQ:
+ if (isPPC64) break;
Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(1)), 0);
return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
@@ -655,6 +660,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
getI32Imm(0)), 0),
Op.getValue(1));
case ISD::SETNE: {
+ if (isPPC64) break;
Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(~0U));
@@ -996,22 +1002,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::SELECT_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
- if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
- if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
- if (N1C->isNullValue() && N3C->isNullValue() &&
- N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
- // FIXME: Implement this optzn for PPC64.
- N->getValueType(0) == MVT::i32) {
- SDNode *Tmp =
- CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
- N->getOperand(0), getI32Imm(~0U));
- return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
- SDValue(Tmp, 0), N->getOperand(0),
- SDValue(Tmp, 1));
- }
+ if (!isPPC64)
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ N->getOperand(0), getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDValue(Tmp, 0), N->getOperand(0),
+ SDValue(Tmp, 1));
+ }
SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
unsigned BROpc = getPredicateForSetCC(CC);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 55c15ec..c9b490b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1870,7 +1870,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(),
@@ -1889,7 +1893,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// to memory. ArgVal will be address of the beginning of
// the object.
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -4675,7 +4683,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
.addReg(TmpReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
.addReg(Tmp3Reg).addReg(Tmp2Reg);
- BuildMI(BB, dl, TII->get(PPC::STWCX))
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
.addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3374e9b..fd62a88 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -504,6 +504,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
unsigned SrcReg = MI.getOperand(0).getReg();
+ bool LP64 = Subtarget.isPPC64();
// We need to store the CR in the low 4-bits of the saved value. First, issue
// an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
@@ -520,7 +521,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
.addImm(0)
.addImm(31);
- addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
.addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
FrameIndex);
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 3343384..130a553 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,6 +43,7 @@ TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
StaticCtorSection = 0;
StaticDtorSection = 0;
LSDASection = 0;
+ CompactUnwindSection = 0;
CommDirectiveSupportsAlignment = true;
DwarfAbbrevSection = 0;
@@ -60,13 +61,14 @@ TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
IsFunctionEHFrameSymbolPrivate = true;
SupportsWeakOmittedEHFrame = true;
+ SupportsCompactUnwindInfo = false;
}
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
}
static bool isSuitableForBSS(const GlobalVariable *GV) {
- Constant *C = GV->getInitializer();
+ const Constant *C = GV->getInitializer();
// Must have zero initializer.
if (!C->isNullValue())
@@ -168,7 +170,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
return SectionKind::getBSS();
}
- Constant *C = GVar->getInitializer();
+ const Constant *C = GVar->getInitializer();
// If the global is marked constant, we can put it into a mergable section,
// a mergable string section, or general .data if it contains relocations.
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index e36e136..bae3343 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -79,9 +79,9 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
/// registers for the specific register class.
static void getAllocatableSetForRC(const MachineFunction &MF,
const TargetRegisterClass *RC, BitVector &R){
- for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
- E = RC->allocation_order_end(MF); I != E; ++I)
- R.set(*I);
+ ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+ for (unsigned i = 0; i != Order.size(); ++i)
+ R.set(Order[i]);
}
BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1cdf2b6..6cd03d0 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12186,8 +12186,8 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) {
- DebugLoc dl = N->getDebugLoc();
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+ const X86TargetLowering *XTLI) {
SDValue Op0 = N->getOperand(0);
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
@@ -12198,7 +12198,8 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86T
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!XTLI->getSubtarget()->is64Bit() &&
!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
- SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+ SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
+ Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
}
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index b3237d5..aebf8dc 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2082,7 +2082,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFunction &MF = *MBB.getParent();
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
- bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+ RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -2114,7 +2115,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+ bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+ RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7774057..8377c3a 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2968,6 +2968,22 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
}
+// These are the correct encodings of the instructions so that we know how to
+// read correct assembly, even though we continue to emit the wrong ones for
+// compatibility with Darwin's buggy assembler.
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+ (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
//===---------------------------------------------------------------------===//
// SSE2 - Move Quadword
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1ad6203..fa3e3f8 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -60,7 +60,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
Is64Bit = Subtarget->is64Bit();
IsWin64 = Subtarget->isTargetWin64();
- StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Is64Bit) {
SlotSize = 8;
@@ -517,13 +516,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Reserve the registers that only exist in 64-bit mode.
if (!Is64Bit) {
+ // These 8-bit registers are part of the x86-64 extension even though their
+ // super-registers are old 32-bits.
+ Reserved.set(X86::SIL);
+ Reserved.set(X86::DIL);
+ Reserved.set(X86::BPL);
+ Reserved.set(X86::SPL);
+
for (unsigned n = 0; n != 8; ++n) {
+ // R8, R9, ...
const unsigned GPR64[] = {
X86::R8, X86::R9, X86::R10, X86::R11,
X86::R12, X86::R13, X86::R14, X86::R15
};
- for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI;
- ++AI)
+ for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
Reserved.set(Reg);
// XMM8, XMM9, ...
@@ -550,6 +556,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttr(Attribute::StackAlignment));
@@ -625,6 +632,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
MachineInstr *New = 0;
@@ -920,10 +928,10 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF.getTarget());
- const X86RegisterInfo *X86RI = TM->getRegisterInfo();
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
MachineRegisterInfo &RI = MF.getRegInfo();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- unsigned StackAlignment = X86RI->getStackAlignment();
+ unsigned StackAlignment = TFI->getStackAlignment();
// Be over-conservative: scan over all vreg defs and find whether vector
// registers are used. If yes, there is a possibility that vector register
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index dd3d3dc..9fd6ed5 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -56,10 +56,6 @@ private:
///
unsigned SlotSize;
- /// StackAlign - Default stack alignment.
- ///
- unsigned StackAlign;
-
/// StackPtr - X86 physical register used as stack ptr.
///
unsigned StackPtr;
@@ -75,8 +71,6 @@ public:
/// register identifier.
static unsigned getX86RegNum(unsigned RegNo);
- unsigned getStackAlignment() const { return StackAlign; }
-
/// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
/// (created by TableGen) for target dependencies.
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 590b38b..14d6d64 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -281,44 +281,9 @@ let Namespace = "X86" in {
def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned X86_GR8_AO_64[] = {
- X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
- X86::R8B, X86::R9B, X86::R10B, X86::R11B,
- X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
- };
-
- GR8Class::iterator
- GR8Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return X86_GR8_AO_64;
- else
- return begin();
- }
-
- GR8Class::iterator
- GR8Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP / EBP to being a frame ptr?
- if (!Subtarget.is64Bit())
- // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
- return begin() + 8;
- else if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate SPL or BPL.
- return array_endof(X86_GR8_AO_64) - 1;
- else
- // If not, just don't allocate SPL.
- return array_endof(X86_GR8_AO_64);
- }
+ let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
}];
}
@@ -394,35 +359,9 @@ def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH)> {
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- // In 64-bit mode, it's not safe to blindly allocate H registers.
- static const unsigned X86_GR8_NOREX_AO_64[] = {
- X86::AL, X86::CL, X86::DL, X86::BL
- };
-
- GR8_NOREXClass::iterator
- GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return X86_GR8_NOREX_AO_64;
- else
- return begin();
- }
-
- GR8_NOREXClass::iterator
- GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return array_endof(X86_GR8_NOREX_AO_64);
- else
- return end();
- }
+ let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
}];
}
// GR16_NOREX - GR16 registers which do not require a REX prefix.
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8f06dd3..6df8ce0 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -114,7 +114,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
MCSymbol *GVSym = Mang->getSymbol(GV);
- Constant *C = GV->getInitializer();
+ const Constant *C = GV->getInitializer();
unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
// Mark the start of the global