aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/SIISelLowering.cpp')
-rw-r--r--lib/Target/R600/SIISelLowering.cpp233
1 files changed, 172 insertions, 61 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index d5d2b68..0b55411 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -14,6 +14,7 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
@@ -24,13 +25,10 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
-const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
-
using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM) {
-
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
@@ -76,7 +74,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
- setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
@@ -97,13 +94,18 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@@ -125,11 +127,20 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8, Custom);
+ setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i128, MVT::i64, Expand);
@@ -137,10 +148,50 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
- setTargetDAGCombine(ISD::SELECT_CC);
+ // We only support LOAD/STORE and vector manipulation ops for vectors
+ // with > 4 elements.
+ MVT VecTypes[] = {
+ MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
+ };
+
+ const size_t NumVecTypes = array_lengthof(VecTypes);
+ for (unsigned Type = 0; Type < NumVecTypes; ++Type) {
+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+ switch(Op) {
+ case ISD::LOAD:
+ case ISD::STORE:
+ case ISD::BUILD_VECTOR:
+ case ISD::BITCAST:
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::INSERT_VECTOR_ELT:
+ case ISD::CONCAT_VECTORS:
+ case ISD::INSERT_SUBVECTOR:
+ case ISD::EXTRACT_SUBVECTOR:
+ break;
+ default:
+ setOperationAction(Op, VecTypes[Type], Expand);
+ break;
+ }
+ }
+ }
+
+ for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) {
+ MVT::SimpleValueType VT = static_cast<MVT::SimpleValueType>(I);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ }
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ }
+
+ setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
setSchedulingPreference(Sched::RegPressure);
@@ -151,6 +202,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
//===----------------------------------------------------------------------===//
bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
bool *IsFast) const {
// XXX: This depends on the address space and also we may want to revist
// the alignment values we specify in the DataLayout.
@@ -159,8 +211,15 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return VT.bitsGT(MVT::i32);
}
-bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const {
- return VT.bitsLE(MVT::i16);
+bool SITargetLowering::shouldSplitVectorType(EVT VT) const {
+ return VT.getScalarType().bitsLE(MVT::i16);
+}
+
+bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ return TII->isInlineConstant(Imm);
}
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
@@ -346,16 +405,16 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned SuperReg = MI->getOperand(0).getReg();
- unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
+ unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
+ unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
.addOperand(MI->getOperand(1));
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
.addImm(0);
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
- .addImm(RSRC_DATA_FORMAT >> 32);
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
.addReg(SubRegHiLo)
.addImm(AMDGPU::sub0)
@@ -439,13 +498,14 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::ADD: return LowerADD(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
- if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
- Op.getValueType().isVector()) {
+ if (Op.getValueType().isVector() &&
+ (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
+ (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ Op.getValueType().getVectorNumElements() > 4))) {
SDValue MergedValues[2] = {
SplitVectorLoad(Op, DAG),
Load->getChain()
@@ -456,6 +516,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
@@ -576,33 +637,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue SITargetLowering::LowerADD(SDValue Op,
- SelectionDAG &DAG) const {
- if (Op.getValueType() != MVT::i64)
- return SDValue();
-
- SDLoc DL(Op);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
-
- SDValue Zero = DAG.getConstant(0, MVT::i32);
- SDValue One = DAG.getConstant(1, MVT::i32);
-
- SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero);
- SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One);
-
- SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero);
- SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One);
-
- SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue);
-
- SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1);
- SDValue Carry = AddLo.getValue(1);
- SDValue AddHi = DAG.getNode(ISD::ADDE, DL, VTList, Hi0, Hi1, Carry);
-
- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddLo, AddHi.getValue(0));
-}
-
/// \brief Helper function for LowerBRCOND
static SDNode *findUser(SDValue Value, unsigned Opcode) {
@@ -700,23 +734,42 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
+ SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
+ SDValue MergedValues[2];
+ MergedValues[1] = Load->getChain();
+ if (Ret.getNode()) {
+ MergedValues[0] = Ret;
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
- if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
return SDValue();
+ }
+
+ EVT MemVT = Load->getMemoryVT();
+
+ assert(!MemVT.isVector() && "Private loads should be scalarized");
+ assert(!MemVT.isFloatingPoint() && "FP loads should be promoted to int");
- SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- Load->getBasePtr(), DAG.getConstant(0, MVT::i32));
- SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
DAG.getConstant(2, MVT::i32));
+ Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+ Load->getChain(), Ptr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
+ if (MemVT.getSizeInBits() == 64) {
+ SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue LoadUpper = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+ Load->getChain(), IncPtr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
+
+ Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ret, LoadUpper);
+ }
- SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
- Load->getChain(), Ptr,
- DAG.getTargetConstant(0, MVT::i32),
- Op.getOperand(2));
- SDValue MergedValues[2] = {
- Ret,
- Load->getChain()
- };
+ MergedValues[0] = Ret;
return DAG.getMergeValues(MergedValues, 2, DL);
}
@@ -744,6 +797,33 @@ SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
Op.getOperand(4));
}
+SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() != MVT::i64)
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue Cond = Op.getOperand(0);
+
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue One = DAG.getConstant(1, MVT::i32);
+
+ SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
+ SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
+
+ SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
+ SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);
+
+ SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1);
+
+ SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One);
+ SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One);
+
+ SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1);
+
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i32, Lo, Hi);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res);
+}
+
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -790,16 +870,47 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (VT.isVector() && VT.getVectorNumElements() >= 8)
return SplitVectorStore(Op, DAG);
+ if (VT == MVT::i1)
+ return DAG.getTruncStore(Store->getChain(), DL,
+ DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
+ Store->getBasePtr(), MVT::i1, Store->getMemOperand());
+
if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
return SDValue();
- SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
- SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Store->getBasePtr(),
DAG.getConstant(2, MVT::i32));
SDValue Chain = Store->getChain();
SmallVector<SDValue, 8> Values;
- if (VT == MVT::i64) {
+ if (Store->isTruncatingStore()) {
+ unsigned Mask = 0;
+ if (Store->getMemoryVT() == MVT::i8) {
+ Mask = 0xff;
+ } else if (Store->getMemoryVT() == MVT::i16) {
+ Mask = 0xffff;
+ }
+ SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+ Chain, Store->getBasePtr(),
+ DAG.getConstant(0, MVT::i32));
+ SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(),
+ DAG.getConstant(0x3, MVT::i32));
+ SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
+ DAG.getConstant(3, MVT::i32));
+ SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(),
+ DAG.getConstant(Mask, MVT::i32));
+ SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
+ MaskedValue, ShiftAmt);
+ SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32,
+ DAG.getConstant(32, MVT::i32), ShiftAmt);
+ SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32,
+ DAG.getConstant(Mask, MVT::i32),
+ RotrAmt);
+ Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
+ Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
+
+ Values.push_back(Dst);
+ } else if (VT == MVT::i64) {
for (unsigned i = 0; i < 2; ++i) {
Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
Store->getValue(), DAG.getConstant(i, MVT::i32)));
@@ -1047,7 +1158,7 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
else
return;
- // Nothing todo if they fit naturaly
+ // Nothing to do if they fit naturally
if (fitsRegClass(DAG, Operand, RegClass))
return;