aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/R600ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/R600ISelLowering.cpp')
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp566
1 files changed, 463 insertions, 103 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index a66baca..9cedadb 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -26,8 +26,7 @@
using namespace llvm;
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
- AMDGPUTargetLowering(TM),
- TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
+ AMDGPUTargetLowering(TM) {
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
@@ -43,11 +42,25 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::AND, MVT::v4i32, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::MUL, MVT::v2i32, Expand);
+ setOperationAction(ISD::MUL, MVT::v4i32, Expand);
+ setOperationAction(ISD::OR, MVT::v4i32, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::SHL, MVT::v4i32, Expand);
+ setOperationAction(ISD::SHL, MVT::v2i32, Expand);
+ setOperationAction(ISD::SRL, MVT::v4i32, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i32, Expand);
+ setOperationAction(ISD::SRA, MVT::v4i32, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i32, Expand);
+ setOperationAction(ISD::SUB, MVT::v4i32, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
setOperationAction(ISD::UREM, MVT::v4i32, Expand);
setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
@@ -58,8 +71,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
- setOperationAction(ISD::ROTL, MVT::i32, Custom);
-
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@@ -70,6 +81,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
+
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@@ -102,6 +116,8 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineFunction * MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
@@ -171,23 +187,99 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::TXD: {
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-
+ MachineOperand &RID = MI->getOperand(4);
+ MachineOperand &SID = MI->getOperand(5);
+ unsigned TextureId = MI->getOperand(6).getImm();
+ unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
+ unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
+
+ switch (TextureId) {
+ case 5: // Rect
+ CTX = CTY = 0;
+ break;
+ case 6: // Shadow1D
+ SrcW = SrcZ;
+ break;
+ case 7: // Shadow2D
+ SrcW = SrcZ;
+ break;
+ case 8: // ShadowRect
+ CTX = CTY = 0;
+ SrcW = SrcZ;
+ break;
+ case 9: // 1DArray
+ SrcZ = SrcY;
+ CTZ = 0;
+ break;
+ case 10: // 2DArray
+ CTZ = 0;
+ break;
+ case 11: // Shadow1DArray
+ SrcZ = SrcY;
+ CTZ = 0;
+ break;
+ case 12: // Shadow2DArray
+ CTZ = 0;
+ break;
+ }
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
.addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6));
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
.addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6));
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6))
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW)
.addReg(T0, RegState::Implicit)
.addReg(T1, RegState::Implicit);
break;
@@ -196,23 +288,100 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::TXD_SHADOW: {
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ MachineOperand &RID = MI->getOperand(4);
+ MachineOperand &SID = MI->getOperand(5);
+ unsigned TextureId = MI->getOperand(6).getImm();
+ unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
+ unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
+
+ switch (TextureId) {
+ case 5: // Rect
+ CTX = CTY = 0;
+ break;
+ case 6: // Shadow1D
+ SrcW = SrcZ;
+ break;
+ case 7: // Shadow2D
+ SrcW = SrcZ;
+ break;
+ case 8: // ShadowRect
+ CTX = CTY = 0;
+ SrcW = SrcZ;
+ break;
+ case 9: // 1DArray
+ SrcZ = SrcY;
+ CTZ = 0;
+ break;
+ case 10: // 2DArray
+ CTZ = 0;
+ break;
+ case 11: // Shadow1DArray
+ SrcZ = SrcY;
+ CTZ = 0;
+ break;
+ case 12: // Shadow2DArray
+ CTZ = 0;
+ break;
+ }
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
.addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6));
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
.addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6));
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6))
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW)
.addReg(T0, RegState::Implicit)
.addReg(T1, RegState::Implicit);
break;
@@ -304,13 +473,9 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
-using namespace llvm::Intrinsic;
-using namespace llvm::AMDGPUIntrinsic;
-
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::ROTL: return LowerROTL(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
@@ -327,7 +492,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
MFI->LiveOuts.push_back(Reg);
- return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
+ return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
}
case AMDGPUIntrinsic::R600_store_swizzle: {
const SDValue Args[8] = {
@@ -340,7 +505,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(2, MVT::i32), // SWZ_Z
DAG.getConstant(3, MVT::i32) // SWZ_W
};
- return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
+ return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
Args, 8);
}
@@ -354,13 +519,17 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
switch(IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case AMDGPUIntrinsic::R600_load_input: {
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.addLiveIn(Reg);
+ return DAG.getCopyFromReg(DAG.getEntryNode(),
+ SDLoc(DAG.getEntryNode()), Reg, VT);
}
case AMDGPUIntrinsic::R600_interp_input: {
@@ -368,6 +537,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
MachineSDNode *interp;
if (ijb < 0) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
return DAG.getTargetExtractSubreg(
@@ -375,59 +547,153 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
DL, MVT::f32, SDValue(interp, 0));
}
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
+ unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
+ MRI.addLiveIn(RegisterI);
+ MRI.addLiveIn(RegisterJ);
+ SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
+ SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
+ SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
+ SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
+
if (slot % 4 < 2)
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
- CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
- CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+ RegisterJNode, RegisterINode);
else
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
- CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
- CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
-
+ RegisterJNode, RegisterINode);
return SDValue(interp, slot % 2);
}
+ case AMDGPUIntrinsic::R600_tex:
+ case AMDGPUIntrinsic::R600_texc:
+ case AMDGPUIntrinsic::R600_txl:
+ case AMDGPUIntrinsic::R600_txlc:
+ case AMDGPUIntrinsic::R600_txb:
+ case AMDGPUIntrinsic::R600_txbc:
+ case AMDGPUIntrinsic::R600_txf:
+ case AMDGPUIntrinsic::R600_txq:
+ case AMDGPUIntrinsic::R600_ddx:
+ case AMDGPUIntrinsic::R600_ddy: {
+ unsigned TextureOp;
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::R600_tex:
+ TextureOp = 0;
+ break;
+ case AMDGPUIntrinsic::R600_texc:
+ TextureOp = 1;
+ break;
+ case AMDGPUIntrinsic::R600_txl:
+ TextureOp = 2;
+ break;
+ case AMDGPUIntrinsic::R600_txlc:
+ TextureOp = 3;
+ break;
+ case AMDGPUIntrinsic::R600_txb:
+ TextureOp = 4;
+ break;
+ case AMDGPUIntrinsic::R600_txbc:
+ TextureOp = 5;
+ break;
+ case AMDGPUIntrinsic::R600_txf:
+ TextureOp = 6;
+ break;
+ case AMDGPUIntrinsic::R600_txq:
+ TextureOp = 7;
+ break;
+ case AMDGPUIntrinsic::R600_ddx:
+ TextureOp = 8;
+ break;
+ case AMDGPUIntrinsic::R600_ddy:
+ TextureOp = 9;
+ break;
+ default:
+ llvm_unreachable("Unknow Texture Operation");
+ }
- case r600_read_ngroups_x:
+ SDValue TexArgs[19] = {
+ DAG.getConstant(TextureOp, MVT::i32),
+ Op.getOperand(1),
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(3, MVT::i32),
+ Op.getOperand(2),
+ Op.getOperand(3),
+ Op.getOperand(4),
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(3, MVT::i32),
+ Op.getOperand(5),
+ Op.getOperand(6),
+ Op.getOperand(7),
+ Op.getOperand(8),
+ Op.getOperand(9),
+ Op.getOperand(10)
+ };
+ return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
+ }
+ case AMDGPUIntrinsic::AMDGPU_dp4: {
+ SDValue Args[8] = {
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+ DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+ DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+ DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+ DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+ DAG.getConstant(3, MVT::i32))
+ };
+ return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
+ }
+
+ case Intrinsic::r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
- case r600_read_ngroups_y:
+ case Intrinsic::r600_read_ngroups_y:
return LowerImplicitParameter(DAG, VT, DL, 1);
- case r600_read_ngroups_z:
+ case Intrinsic::r600_read_ngroups_z:
return LowerImplicitParameter(DAG, VT, DL, 2);
- case r600_read_global_size_x:
+ case Intrinsic::r600_read_global_size_x:
return LowerImplicitParameter(DAG, VT, DL, 3);
- case r600_read_global_size_y:
+ case Intrinsic::r600_read_global_size_y:
return LowerImplicitParameter(DAG, VT, DL, 4);
- case r600_read_global_size_z:
+ case Intrinsic::r600_read_global_size_z:
return LowerImplicitParameter(DAG, VT, DL, 5);
- case r600_read_local_size_x:
+ case Intrinsic::r600_read_local_size_x:
return LowerImplicitParameter(DAG, VT, DL, 6);
- case r600_read_local_size_y:
+ case Intrinsic::r600_read_local_size_y:
return LowerImplicitParameter(DAG, VT, DL, 7);
- case r600_read_local_size_z:
+ case Intrinsic::r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
- case r600_read_tgid_x:
+ case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
- case r600_read_tgid_y:
+ case Intrinsic::r600_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Y, VT);
- case r600_read_tgid_z:
+ case Intrinsic::r600_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Z, VT);
- case r600_read_tidig_x:
+ case Intrinsic::r600_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_X, VT);
- case r600_read_tidig_y:
+ case Intrinsic::r600_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Y, VT);
- case r600_read_tidig_z:
+ case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
}
@@ -464,7 +730,7 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(
ISD::SETCC,
- Op.getDebugLoc(),
+ SDLoc(Op),
MVT::i1,
Op, DAG.getConstantFP(0.0f, MVT::f32),
DAG.getCondCode(ISD::SETNE)
@@ -472,7 +738,7 @@ SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
}
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
- DebugLoc DL,
+ SDLoc DL,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
@@ -501,18 +767,6 @@ SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
}
-SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
- Op.getOperand(0),
- Op.getOperand(0),
- DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(32, MVT::i32),
- Op.getOperand(1)));
-}
-
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
return Cst->isNullValue();
@@ -524,7 +778,7 @@ bool R600TargetLowering::isZero(SDValue Op) const {
}
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -645,7 +899,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::SELECT_CC,
- Op.getDebugLoc(),
+ SDLoc(Op),
Op.getValueType(),
Op.getOperand(0),
DAG.getConstant(0, MVT::i32),
@@ -676,7 +930,7 @@ SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
default: llvm_unreachable("Invalid stack width");
}
- return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
+ return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
DAG.getConstant(SRLPad, MVT::i32));
}
@@ -710,7 +964,7 @@ void R600TargetLowering::getStackAddress(unsigned StackWidth,
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
SDValue Chain = Op.getOperand(0);
SDValue Value = Op.getOperand(1);
@@ -772,7 +1026,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
}
Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
- DAG.getTargetConstant(0, MVT::i32)); // Channel
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
}
return Chain;
@@ -822,7 +1076,7 @@ ConstantAddressBlock(unsigned AddressSpace) {
SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
{
EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
SDValue Chain = Op.getOperand(0);
SDValue Ptr = Op.getOperand(1);
@@ -851,7 +1105,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
DAG.getConstant(LoadNode->getAddressSpace() -
- AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
+ AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
);
}
@@ -924,7 +1178,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
+ SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned ParamOffsetBytes = 36;
Function::const_arg_iterator FuncArg =
@@ -955,11 +1209,105 @@ SDValue R600TargetLowering::LowerFormalArguments(
return Chain;
}
-EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
+EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector()) return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
+static SDValue
+CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
+ DenseMap<unsigned, unsigned> &RemapSwizzle) {
+ assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
+ assert(RemapSwizzle.empty());
+ SDValue NewBldVec[4] = {
+ VectorEntry.getOperand(0),
+ VectorEntry.getOperand(1),
+ VectorEntry.getOperand(2),
+ VectorEntry.getOperand(3)
+ };
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
+ if (C->isZero()) {
+ RemapSwizzle[i] = 4; // SEL_0
+ NewBldVec[i] = DAG.getUNDEF(MVT::f32);
+ } else if (C->isExactlyValue(1.0)) {
+ RemapSwizzle[i] = 5; // SEL_1
+ NewBldVec[i] = DAG.getUNDEF(MVT::f32);
+ }
+ }
+
+ if (NewBldVec[i].getOpcode() == ISD::UNDEF)
+ continue;
+ for (unsigned j = 0; j < i; j++) {
+ if (NewBldVec[i] == NewBldVec[j]) {
+ NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
+ RemapSwizzle[i] = j;
+ break;
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
+ VectorEntry.getValueType(), NewBldVec, 4);
+}
+
+static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
+ DenseMap<unsigned, unsigned> &RemapSwizzle) {
+ assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
+ assert(RemapSwizzle.empty());
+ SDValue NewBldVec[4] = {
+ VectorEntry.getOperand(0),
+ VectorEntry.getOperand(1),
+ VectorEntry.getOperand(2),
+ VectorEntry.getOperand(3)
+ };
+ bool isUnmovable[4] = { false, false, false, false };
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
+ ->getZExtValue();
+ if (!isUnmovable[Idx]) {
+ // Swap i and Idx
+ std::swap(NewBldVec[Idx], NewBldVec[i]);
+ RemapSwizzle[Idx] = i;
+ RemapSwizzle[i] = Idx;
+ }
+ isUnmovable[Idx] = true;
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
+ VectorEntry.getValueType(), NewBldVec, 4);
+}
+
+
+SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
+SDValue Swz[4], SelectionDAG &DAG) const {
+ assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
+ // Old -> New swizzle values
+ DenseMap<unsigned, unsigned> SwizzleRemap;
+
+ BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+ if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
+ Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
+ }
+
+ SwizzleRemap.clear();
+ BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+ if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
+ Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
+ }
+
+ return BuildVector;
+}
+
+
//===----------------------------------------------------------------------===//
// Custom DAG Optimizations
//===----------------------------------------------------------------------===//
@@ -973,7 +1321,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_ROUND: {
SDValue Arg = N->getOperand(0);
if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
- return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Arg.getOperand(0));
}
break;
@@ -998,7 +1346,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
- return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
SelectCC.getOperand(0), // LHS
SelectCC.getOperand(1), // RHS
DAG.getConstant(-1, MVT::i32), // True
@@ -1021,7 +1369,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
unsigned Element = Const->getZExtValue();
- return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Arg->getOperand(0).getOperand(Element));
}
}
@@ -1056,7 +1404,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
LHSCC = ISD::getSetCCInverse(LHSCC,
LHS.getOperand(0).getValueType().isInteger());
- return DAG.getSelectCC(N->getDebugLoc(),
+ return DAG.getSelectCC(SDLoc(N),
LHS.getOperand(0),
LHS.getOperand(1),
LHS.getOperand(2),
@@ -1069,12 +1417,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
SDValue Arg = N->getOperand(1);
if (Arg.getOpcode() != ISD::BUILD_VECTOR)
break;
- SDValue NewBldVec[4] = {
- DAG.getUNDEF(MVT::f32),
- DAG.getUNDEF(MVT::f32),
- DAG.getUNDEF(MVT::f32),
- DAG.getUNDEF(MVT::f32)
- };
+
SDValue NewArgs[8] = {
N->getOperand(0), // Chain
SDValue(),
@@ -1085,23 +1428,40 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(6), // SWZ_Z
N->getOperand(7) // SWZ_W
};
- for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
- if (C->isZero()) {
- NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
- } else if (C->isExactlyValue(1.0)) {
- NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
- } else {
- NewBldVec[i] = Arg.getOperand(i);
- }
- } else {
- NewBldVec[i] = Arg.getOperand(i);
- }
- }
- DebugLoc DL = N->getDebugLoc();
- NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
+ SDLoc DL(N);
+ NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
}
+ case AMDGPUISD::TEXTURE_FETCH: {
+ SDValue Arg = N->getOperand(1);
+ if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+ break;
+
+ SDValue NewArgs[19] = {
+ N->getOperand(0),
+ N->getOperand(1),
+ N->getOperand(2),
+ N->getOperand(3),
+ N->getOperand(4),
+ N->getOperand(5),
+ N->getOperand(6),
+ N->getOperand(7),
+ N->getOperand(8),
+ N->getOperand(9),
+ N->getOperand(10),
+ N->getOperand(11),
+ N->getOperand(12),
+ N->getOperand(13),
+ N->getOperand(14),
+ N->getOperand(15),
+ N->getOperand(16),
+ N->getOperand(17),
+ N->getOperand(18),
+ };
+ NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
+ return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
+ NewArgs, 19);
+ }
}
return SDValue();
}