aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/R600InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/R600InstrInfo.cpp')
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp328
1 files changed, 294 insertions, 34 deletions
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 8fd8385..4f5cfcd 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -19,8 +19,8 @@
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#define GET_INSTRINFO_CTOR
@@ -30,7 +30,7 @@ using namespace llvm;
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
- RI(tm, *this),
+ RI(tm),
ST(tm.getSubtarget<AMDGPUSubtarget>())
{ }
@@ -116,9 +116,6 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
switch(Opcode) {
default: return false;
- case AMDGPU::DOT4_r600_pseudo:
- case AMDGPU::DOT4_eg_pseudo:
- return true;
}
}
@@ -150,7 +147,7 @@ bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
}
bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
- return ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST;
+ return ST.hasVertexCache() && IS_VTX(get(Opcode));
}
bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
@@ -159,8 +156,7 @@ bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
}
bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
- return (!ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST) ||
- (get(Opcode).TSFlags & R600_InstFlag::TEX_INST);
+ return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
}
bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
@@ -169,6 +165,181 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
usesTextureCache(MI->getOpcode());
}
+SmallVector<std::pair<MachineOperand *, int64_t>, 3>
+R600InstrInfo::getSrcs(MachineInstr *MI) const {
+ SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
+
+ if (MI->getOpcode() == AMDGPU::DOT_4) {
+ static const R600Operands::VecOps OpTable[8][2] = {
+ {R600Operands::SRC0_X, R600Operands::SRC0_SEL_X},
+ {R600Operands::SRC0_Y, R600Operands::SRC0_SEL_Y},
+ {R600Operands::SRC0_Z, R600Operands::SRC0_SEL_Z},
+ {R600Operands::SRC0_W, R600Operands::SRC0_SEL_W},
+ {R600Operands::SRC1_X, R600Operands::SRC1_SEL_X},
+ {R600Operands::SRC1_Y, R600Operands::SRC1_SEL_Y},
+ {R600Operands::SRC1_Z, R600Operands::SRC1_SEL_Z},
+ {R600Operands::SRC1_W, R600Operands::SRC1_SEL_W},
+ };
+
+ for (unsigned j = 0; j < 8; j++) {
+ MachineOperand &MO = MI->getOperand(OpTable[j][0] + 1);
+ unsigned Reg = MO.getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ unsigned Sel = MI->getOperand(OpTable[j][1] + 1).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+ continue;
+ }
+
+ }
+ return Result;
+ }
+
+ static const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ unsigned Reg = MI->getOperand(SrcIdx).getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ unsigned Sel = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+ continue;
+ }
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned Imm = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), R600Operands::IMM)).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
+ continue;
+ }
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
+ }
+ return Result;
+}
+
+std::vector<std::pair<int, unsigned> >
+R600InstrInfo::ExtractSrcs(MachineInstr *MI,
+ const DenseMap<unsigned, unsigned> &PV)
+ const {
+ const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
+ const std::pair<int, unsigned> DummyPair(-1, 0);
+ std::vector<std::pair<int, unsigned> > Result;
+ unsigned i = 0;
+ for (unsigned n = Srcs.size(); i < n; ++i) {
+ unsigned Reg = Srcs[i].first->getReg();
+ unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+ unsigned Chan = RI.getHWRegChan(Reg);
+ if (Index > 127) {
+ Result.push_back(DummyPair);
+ continue;
+ }
+ if (PV.find(Index) != PV.end()) {
+ Result.push_back(DummyPair);
+ continue;
+ }
+ Result.push_back(std::pair<int, unsigned>(Index, Chan));
+ }
+ for (; i < 3; ++i)
+ Result.push_back(DummyPair);
+ return Result;
+}
+
+static std::vector<std::pair<int, unsigned> >
+Swizzle(std::vector<std::pair<int, unsigned> > Src,
+ R600InstrInfo::BankSwizzle Swz) {
+ switch (Swz) {
+ case R600InstrInfo::ALU_VEC_012:
+ break;
+ case R600InstrInfo::ALU_VEC_021:
+ std::swap(Src[1], Src[2]);
+ break;
+ case R600InstrInfo::ALU_VEC_102:
+ std::swap(Src[0], Src[1]);
+ break;
+ case R600InstrInfo::ALU_VEC_120:
+ std::swap(Src[0], Src[1]);
+ std::swap(Src[0], Src[2]);
+ break;
+ case R600InstrInfo::ALU_VEC_201:
+ std::swap(Src[0], Src[2]);
+ std::swap(Src[0], Src[1]);
+ break;
+ case R600InstrInfo::ALU_VEC_210:
+ std::swap(Src[0], Src[2]);
+ break;
+ }
+ return Src;
+}
+
+static bool
+isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<R600InstrInfo::BankSwizzle> &Swz,
+ unsigned CheckedSize) {
+ int Vector[4][3];
+ memset(Vector, -1, sizeof(Vector));
+ for (unsigned i = 0; i < CheckedSize; i++) {
+ const std::vector<std::pair<int, unsigned> > &Srcs =
+ Swizzle(IGSrcs[i], Swz[i]);
+ for (unsigned j = 0; j < 3; j++) {
+ const std::pair<int, unsigned> &Src = Srcs[j];
+ if (Src.first < 0)
+ continue;
+ if (Vector[Src.second][j] < 0)
+ Vector[Src.second][j] = Src.first;
+ if (Vector[Src.second][j] != Src.first)
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool recursiveFitsFPLimitation(
+const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+unsigned Depth = 0) {
+ if (!isLegal(IGSrcs, SwzCandidate, Depth))
+ return false;
+ if (IGSrcs.size() == Depth)
+ return true;
+ unsigned i = SwzCandidate[Depth];
+ for (; i < 6; i++) {
+ SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
+ if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
+ return true;
+ }
+ SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
+ return false;
+}
+
+bool
+R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
+ const DenseMap<unsigned, unsigned> &PV,
+ std::vector<BankSwizzle> &ValidSwizzle)
+ const {
+ //Todo : support shared src0 - src1 operand
+
+ std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
+ ValidSwizzle.clear();
+ for (unsigned i = 0, e = IG.size(); i < e; ++i) {
+ IGSrcs.push_back(ExtractSrcs(IG[i], PV));
+ unsigned Op = getOperandIdx(IG[i]->getOpcode(),
+ R600Operands::BANK_SWIZZLE);
+ ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
+ IG[i]->getOperand(Op).getImm());
+ }
+ bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
+ if (!Result)
+ return false;
+ return true;
+}
+
+
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
@@ -198,34 +369,22 @@ bool
R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
std::vector<unsigned> Consts;
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
- const MachineInstr *MI = MIs[i];
-
- const R600Operands::Ops OpTable[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL},
- };
-
+ MachineInstr *MI = MIs[i];
if (!isALUInstr(MI->getOpcode()))
continue;
- for (unsigned j = 0; j < 3; j++) {
- int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
- if (SrcIdx < 0)
- break;
- unsigned Reg = MI->getOperand(SrcIdx).getReg();
- if (Reg == AMDGPU::ALU_CONST) {
- unsigned Const = MI->getOperand(
- getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
- Consts.push_back(Const);
- continue;
- }
- if (AMDGPU::R600_KC0RegClass.contains(Reg) ||
- AMDGPU::R600_KC1RegClass.contains(Reg)) {
- unsigned Index = RI.getEncodingValue(Reg) & 0xff;
- unsigned Chan = RI.getHWRegChan(Reg);
+ const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Srcs =
+ getSrcs(MI);
+
+ for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
+ std::pair<MachineOperand *, unsigned> Src = Srcs[j];
+ if (Src.first->getReg() == AMDGPU::ALU_CONST)
+ Consts.push_back(Src.second);
+ if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
+ AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
+ unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
+ unsigned Chan = RI.getHWRegChan(Src.first->getReg());
Consts.push_back((Index << 2) | Chan);
- continue;
}
}
}
@@ -657,7 +816,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
AddrReg, ValueReg)
- .addReg(AMDGPU::AR_X, RegState::Implicit);
+ .addReg(AMDGPU::AR_X,
+ RegState::Implicit | RegState::Kill);
setImmOperand(Mov, R600Operands::DST_REL, 1);
return Mov;
}
@@ -674,7 +834,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
ValueReg,
AddrReg)
- .addReg(AMDGPU::AR_X, RegState::Implicit);
+ .addReg(AMDGPU::AR_X,
+ RegState::Implicit | RegState::Kill);
setImmOperand(Mov, R600Operands::SRC0_REL, 1);
return Mov;
@@ -729,6 +890,95 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
return MIB;
}
+#define OPERAND_CASE(Label) \
+ case Label: { \
+ static const R600Operands::VecOps Ops[] = \
+ { \
+ Label##_X, \
+ Label##_Y, \
+ Label##_Z, \
+ Label##_W \
+ }; \
+ return Ops[Slot]; \
+ }
+
+static R600Operands::VecOps
+getSlotedOps(R600Operands::Ops Op, unsigned Slot) {
+ switch (Op) {
+ OPERAND_CASE(R600Operands::UPDATE_EXEC_MASK)
+ OPERAND_CASE(R600Operands::UPDATE_PREDICATE)
+ OPERAND_CASE(R600Operands::WRITE)
+ OPERAND_CASE(R600Operands::OMOD)
+ OPERAND_CASE(R600Operands::DST_REL)
+ OPERAND_CASE(R600Operands::CLAMP)
+ OPERAND_CASE(R600Operands::SRC0)
+ OPERAND_CASE(R600Operands::SRC0_NEG)
+ OPERAND_CASE(R600Operands::SRC0_REL)
+ OPERAND_CASE(R600Operands::SRC0_ABS)
+ OPERAND_CASE(R600Operands::SRC0_SEL)
+ OPERAND_CASE(R600Operands::SRC1)
+ OPERAND_CASE(R600Operands::SRC1_NEG)
+ OPERAND_CASE(R600Operands::SRC1_REL)
+ OPERAND_CASE(R600Operands::SRC1_ABS)
+ OPERAND_CASE(R600Operands::SRC1_SEL)
+ OPERAND_CASE(R600Operands::PRED_SEL)
+ default:
+ llvm_unreachable("Wrong Operand");
+ }
+}
+
+#undef OPERAND_CASE
+
+static int
+getVecOperandIdx(R600Operands::VecOps Op) {
+ return 1 + Op;
+}
+
+
+MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
+ MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
+ const {
+ assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
+ unsigned Opcode;
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::R700)
+ Opcode = AMDGPU::DOT4_r600;
+ else
+ Opcode = AMDGPU::DOT4_eg;
+ MachineBasicBlock::iterator I = MI;
+ MachineOperand &Src0 = MI->getOperand(
+ getVecOperandIdx(getSlotedOps(R600Operands::SRC0, Slot)));
+ MachineOperand &Src1 = MI->getOperand(
+ getVecOperandIdx(getSlotedOps(R600Operands::SRC1, Slot)));
+ MachineInstr *MIB = buildDefaultInstruction(
+ MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
+ static const R600Operands::Ops Operands[14] = {
+ R600Operands::UPDATE_EXEC_MASK,
+ R600Operands::UPDATE_PREDICATE,
+ R600Operands::WRITE,
+ R600Operands::OMOD,
+ R600Operands::DST_REL,
+ R600Operands::CLAMP,
+ R600Operands::SRC0_NEG,
+ R600Operands::SRC0_REL,
+ R600Operands::SRC0_ABS,
+ R600Operands::SRC0_SEL,
+ R600Operands::SRC1_NEG,
+ R600Operands::SRC1_REL,
+ R600Operands::SRC1_ABS,
+ R600Operands::SRC1_SEL,
+ };
+
+ for (unsigned i = 0; i < 14; i++) {
+ MachineOperand &MO = MI->getOperand(
+ getVecOperandIdx(getSlotedOps(Operands[i], Slot)));
+ assert (MO.isImm());
+ setImmOperand(MIB, Operands[i], MO.getImm());
+ }
+ MIB->getOperand(20).setImm(0);
+ return MIB;
+}
+
MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
unsigned DstReg,
@@ -744,6 +994,11 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
return getOperandIdx(MI.getOpcode(), Op);
}
+int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
+ R600Operands::VecOps Op) const {
+ return getOperandIdx(MI.getOpcode(), Op);
+}
+
int R600InstrInfo::getOperandIdx(unsigned Opcode,
R600Operands::Ops Op) const {
unsigned TargetFlags = get(Opcode).TSFlags;
@@ -774,6 +1029,11 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
return R600Operands::ALUOpTable[OpTableIdx][Op];
}
+int R600InstrInfo::getOperandIdx(unsigned Opcode,
+ R600Operands::VecOps Op) const {
+ return Op + 1;
+}
+
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
int64_t Imm) const {
int Idx = getOperandIdx(*MI, Op);