aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/SIISelLowering.cpp')
-rw-r--r--lib/Target/R600/SIISelLowering.cpp307
1 files changed, 210 insertions, 97 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 0b55411..c9e247c 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -29,22 +29,21 @@ using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM) {
- addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
- addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
+ addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
- addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
- addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
- addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
+ addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -78,8 +77,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
- setOperationAction(ISD::BITCAST, MVT::i128, Legal);
-
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
@@ -99,10 +96,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
- setOperationAction(ISD::STORE, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::f32, MVT::i32);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
@@ -119,6 +117,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
@@ -126,39 +140,48 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
- setTruncStoreAction(MVT::i128, MVT::i64, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+ // These should use UDIVREM, so set them to expand
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
MVT VecTypes[] = {
MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
};
- const size_t NumVecTypes = array_lengthof(VecTypes);
- for (unsigned Type = 0; Type < NumVecTypes; ++Type) {
+ for (MVT VT : VecTypes) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
case ISD::LOAD:
@@ -172,7 +195,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
case ISD::EXTRACT_SUBVECTOR:
break;
default:
- setOperationAction(Op, VecTypes[Type], Expand);
+ setOperationAction(Op, VT, Expand);
break;
}
}
@@ -189,6 +212,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
setTargetDAGCombine(ISD::SELECT_CC);
@@ -204,10 +228,40 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
unsigned AddrSpace,
bool *IsFast) const {
+ if (IsFast)
+ *IsFast = false;
+
// XXX: This depends on the address space and also we may want to revist
// the alignment values we specify in the DataLayout.
+
+ // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
+ // which isn't a simple VT.
if (!VT.isSimple() || VT == MVT::Other)
return false;
+
+ // XXX - CI changes say "Support for unaligned memory accesses" but I don't
+ // see what for specifically. The wording everywhere else seems to be the
+ // same.
+
+ // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have
+ // no alignment restrictions.
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
+ // Using any pair of GPRs should be the same as any other pair.
+ if (IsFast)
+ *IsFast = true;
+ return VT.bitsGE(MVT::i64);
+ }
+
+ // XXX - The only mention I see of this in the ISA manual is for LDS direct
+ // reads the "byte address and must be dword aligned". Is it also true for the
+ // normal loads and stores?
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS)
+ return false;
+
+ // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
+ // byte-address are ignored, thus forcing Dword alignment.
+ if (IsFast)
+ *IsFast = true;
return VT.bitsGT(MVT::i32);
}
@@ -224,7 +278,7 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc DL, SDValue Chain,
- unsigned Offset) const {
+ unsigned Offset, bool Signed) const {
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_ADDRESS);
@@ -232,7 +286,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(Offset, MVT::i64));
- return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr,
+ return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr,
MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
false, false, MemVT.getSizeInBits() >> 3);
@@ -340,7 +394,8 @@ SDValue SITargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
- 36 + VA.getLocMemOffset());
+ 36 + VA.getLocMemOffset(),
+ Ins[i].Flags.isSExt());
InVals.push_back(Arg);
continue;
}
@@ -381,8 +436,7 @@ SDValue SITargetLowering::LowerFormalArguments(
for (unsigned j = 0; j != NumElements; ++j)
Regs.push_back(DAG.getUNDEF(VT));
- InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
- Regs.data(), Regs.size()));
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
continue;
}
@@ -395,15 +449,15 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineBasicBlock::iterator I = *MI;
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
case AMDGPU::SI_ADDR64_RSRC: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned SuperReg = MI->getOperand(0).getReg();
unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
@@ -428,9 +482,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
- case AMDGPU::V_SUB_F64: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ case AMDGPU::V_SUB_F64:
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
@@ -442,11 +494,9 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
.addImm(2); /* NEG */
MI->eraseFromParent();
break;
- }
+
case AMDGPU::SI_RegisterStorePseudo: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
MachineInstrBuilder MIB =
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
@@ -455,6 +505,50 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MIB.addOperand(MI->getOperand(i));
MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::FABS_SI: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
+ Reg)
+ .addImm(0x7fffffff);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(Reg);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::FNEG_SI: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
+ Reg)
+ .addImm(0x80000000);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(Reg);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::FCLAMP_SI: {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64),
+ MI->getOperand(0).getReg())
+ .addImm(0) // SRC0 modifiers
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // SRC1 modifiers
+ .addImm(0) // SRC1
+ .addImm(1) // CLAMP
+ .addImm(0); // OMOD
+ MI->eraseFromParent();
}
}
return BB;
@@ -510,7 +604,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SplitVectorLoad(Op, DAG),
Load->getChain()
};
- return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
+ return DAG.getMergeValues(MergedValues, SDLoc(Op));
} else {
return LowerLOAD(Op, DAG);
}
@@ -533,23 +627,23 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case Intrinsic::r600_read_ngroups_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false);
case Intrinsic::r600_read_ngroups_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false);
case Intrinsic::r600_read_ngroups_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false);
case Intrinsic::r600_read_global_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false);
case Intrinsic::r600_read_global_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false);
case Intrinsic::r600_read_global_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false);
case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false);
case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false);
case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false);
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
@@ -570,7 +664,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
AMDGPU::VGPR2, VT);
case AMDGPUIntrinsic::SI_load_const: {
SDValue Ops [] = {
- ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(1),
Op.getOperand(2)
};
@@ -579,7 +673,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
VT.getSizeInBits() / 8, 4);
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
- Op->getVTList(), Ops, 2, VT, MMO);
+ Op->getVTList(), Ops, VT, MMO);
}
case AMDGPUIntrinsic::SI_sample:
return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
@@ -591,7 +685,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
case AMDGPUIntrinsic::SI_vs_load_input:
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
}
@@ -606,7 +700,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Ops [] = {
Chain,
- ResourceDescriptorToi128(Op.getOperand(2), DAG),
+ Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(4),
Op.getOperand(5),
@@ -627,8 +721,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
MachineMemOperand::MOStore,
VT.getSizeInBits() / 8, 4);
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops,
- sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
+ Op->getVTList(), Ops, VT, MMO);
}
default:
break;
@@ -650,7 +743,7 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
if (I->getOpcode() == Opcode)
return *I;
}
- return 0;
+ return nullptr;
}
/// This transforms the control flow intrinsics to get the branch destination as
@@ -662,7 +755,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SDNode *Intr = BRCOND.getOperand(1).getNode();
SDValue Target = BRCOND.getOperand(2);
- SDNode *BR = 0;
+ SDNode *BR = nullptr;
if (Intr->getOpcode() == ISD::SETCC) {
// As long as we negate the condition everything is fine
@@ -695,7 +788,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
// build the new intrinsic call
SDNode *Result = DAG.getNode(
Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
- DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
+ DAG.getVTList(Res), Ops).getNode();
if (BR) {
// Give the branch instruction our target
@@ -703,7 +796,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
BR->getOperand(0),
BRCOND.getOperand(2)
};
- DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
+ DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops);
}
SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
@@ -739,7 +832,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
MergedValues[1] = Load->getChain();
if (Ret.getNode()) {
MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
@@ -770,30 +863,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
-SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
- SelectionDAG &DAG) const {
-
- if (Op.getValueType() == MVT::i128) {
- return Op;
- }
-
- assert(Op.getOpcode() == ISD::UNDEF);
-
- return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
- DAG.getConstant(0, MVT::i64),
- DAG.getConstant(0, MVT::i64));
-}
-
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
const SDValue &Op,
SelectionDAG &DAG) const {
return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
Op.getOperand(2),
- ResourceDescriptorToi128(Op.getOperand(3), DAG),
+ Op.getOperand(3),
Op.getOperand(4));
}
@@ -833,12 +912,6 @@ SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
- // Possible Min/Max pattern
- SDValue MinMax = LowerMinMax(Op, DAG);
- if (MinMax.getNode()) {
- return MinMax;
- }
-
SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
}
@@ -948,8 +1021,12 @@ SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
return SDValue();
}
- return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() != MVT::i32)
+ Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
+
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Src, Zero);
}
//===----------------------------------------------------------------------===//
@@ -963,7 +1040,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
EVT VT = N->getValueType(0);
switch (N->getOpcode()) {
- default: break;
+ default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
case ISD::SELECT_CC: {
ConstantSDNode *True, *False;
// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
@@ -982,7 +1059,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
SDValue Arg0 = N->getOperand(0);
SDValue Arg1 = N->getOperand(1);
SDValue CC = N->getOperand(2);
- ConstantSDNode * C = NULL;
+ ConstantSDNode * C = nullptr;
ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
// i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
@@ -998,7 +1075,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
break;
}
}
- return SDValue();
+
+ return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
/// \brief Test if RegClass is one of the VSrc classes
@@ -1029,9 +1107,11 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
return -1;
}
Imm.I = Node->getSExtValue();
- } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+ } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
+ if (N->getValueType(0) != MVT::f32)
+ return -1;
Imm.F = Node->getValueAPF().convertToFloat();
- else
+ } else
return -1; // It isn't an immediate
if ((Imm.I >= -16 && Imm.I <= 64) ||
@@ -1051,7 +1131,7 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode()))
+ if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
return false;
const SDValue &Op = Mov->getOperand(0);
@@ -1098,7 +1178,7 @@ const TargetRegisterClass *SITargetLowering::getRegClassForNode(
}
return TRI.getPhysRegClass(Reg);
}
- default: return NULL;
+ default: return nullptr;
}
}
const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode());
@@ -1202,17 +1282,17 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
// Commuted opcode if available
int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
- const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
+ const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev);
assert(!DescRev || DescRev->getNumDefs() == NumDefs);
assert(!DescRev || DescRev->getNumOperands() == NumOps);
// e64 version if available, -1 otherwise
int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
- const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
+ const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64);
+ int InputModifiers[3] = {0};
assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
- assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
bool HaveVSrc = false, HaveSSrc = false;
@@ -1279,17 +1359,18 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
fitsRegClass(DAG, Ops[1], OtherRegClass))) {
// Swap commutable operands
- SDValue Tmp = Ops[1];
- Ops[1] = Ops[0];
- Ops[0] = Tmp;
+ std::swap(Ops[0], Ops[1]);
Desc = DescRev;
- DescRev = 0;
+ DescRev = nullptr;
continue;
}
}
- if (DescE64 && !Immediate) {
+ if (Immediate)
+ continue;
+
+ if (DescE64) {
// Test if it makes sense to switch to e64 encoding
unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
@@ -1305,14 +1386,46 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
Immediate = -1;
Promote2e64 = true;
Desc = DescE64;
- DescE64 = 0;
+ DescE64 = nullptr;
}
}
+
+ if (!DescE64 && !Promote2e64)
+ continue;
+ if (!Operand.isMachineOpcode())
+ continue;
+ if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
+ Ops.pop_back();
+ Ops.push_back(Operand.getOperand(0));
+ InputModifiers[i] = 1;
+ Promote2e64 = true;
+ if (!DescE64)
+ continue;
+ Desc = DescE64;
+ DescE64 = 0;
+ }
+ else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
+ Ops.pop_back();
+ Ops.push_back(Operand.getOperand(0));
+ InputModifiers[i] = 2;
+ Promote2e64 = true;
+ if (!DescE64)
+ continue;
+ Desc = DescE64;
+ DescE64 = 0;
+ }
}
if (Promote2e64) {
+ std::vector<SDValue> OldOps(Ops);
+ Ops.clear();
+ for (unsigned i = 0; i < OldOps.size(); ++i) {
+ // src_modifier
+ Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
+ Ops.push_back(OldOps[i]);
+ }
// Add the modifier flags while promoting
- for (unsigned i = 0; i < 4; ++i)
+ for (unsigned i = 0; i < 2; ++i)
Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
}
@@ -1390,7 +1503,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
Ops.push_back(Node->getOperand(i));
- Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+ Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
// If we only got one lane, replace it with a copy
// (if NewDmask has only one bit set...)