diff options
Diffstat (limited to 'lib/Target/R600/R600InstrInfo.cpp')
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 328 |
1 files changed, 294 insertions, 34 deletions
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 8fd8385..4f5cfcd 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -19,8 +19,8 @@ #include "R600Defines.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #define GET_INSTRINFO_CTOR @@ -30,7 +30,7 @@ using namespace llvm; R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) : AMDGPUInstrInfo(tm), - RI(tm, *this), + RI(tm), ST(tm.getSubtarget<AMDGPUSubtarget>()) { } @@ -116,9 +116,6 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { bool R600InstrInfo::isReductionOp(unsigned Opcode) const { switch(Opcode) { default: return false; - case AMDGPU::DOT4_r600_pseudo: - case AMDGPU::DOT4_eg_pseudo: - return true; } } @@ -150,7 +147,7 @@ bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { } bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { - return ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST; + return ST.hasVertexCache() && IS_VTX(get(Opcode)); } bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { @@ -159,8 +156,7 @@ bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { } bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { - return (!ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST) || - (get(Opcode).TSFlags & R600_InstFlag::TEX_INST); + return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); } bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { @@ -169,6 +165,181 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { usesTextureCache(MI->getOpcode()); } +SmallVector<std::pair<MachineOperand *, int64_t>, 3> +R600InstrInfo::getSrcs(MachineInstr *MI) const { + SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; + + if (MI->getOpcode() == AMDGPU::DOT_4) { + static const R600Operands::VecOps OpTable[8][2] = { + {R600Operands::SRC0_X, R600Operands::SRC0_SEL_X}, + {R600Operands::SRC0_Y, R600Operands::SRC0_SEL_Y}, + {R600Operands::SRC0_Z, R600Operands::SRC0_SEL_Z}, + {R600Operands::SRC0_W, R600Operands::SRC0_SEL_W}, + {R600Operands::SRC1_X, R600Operands::SRC1_SEL_X}, + {R600Operands::SRC1_Y, R600Operands::SRC1_SEL_Y}, + {R600Operands::SRC1_Z, R600Operands::SRC1_SEL_Z}, + {R600Operands::SRC1_W, R600Operands::SRC1_SEL_W}, + }; + + for (unsigned j = 0; j < 8; j++) { + MachineOperand &MO = MI->getOperand(OpTable[j][0] + 1); + unsigned Reg = MO.getReg(); + if (Reg == AMDGPU::ALU_CONST) { + unsigned Sel = MI->getOperand(OpTable[j][1] + 1).getImm(); + Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); + continue; + } + + } + return Result; + } + + static const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + MachineOperand &MO = MI->getOperand(SrcIdx); + unsigned Reg = MI->getOperand(SrcIdx).getReg(); + if (Reg == AMDGPU::ALU_CONST) { + unsigned Sel = MI->getOperand( + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); + continue; + } + if (Reg == AMDGPU::ALU_LITERAL_X) { + unsigned Imm = MI->getOperand( + getOperandIdx(MI->getOpcode(), R600Operands::IMM)).getImm(); + Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); + continue; + } + Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); + } + return Result; +} + +std::vector<std::pair<int, unsigned> > +R600InstrInfo::ExtractSrcs(MachineInstr *MI, + const DenseMap<unsigned, unsigned> &PV) + const { + const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); + const std::pair<int, unsigned> DummyPair(-1, 0); + std::vector<std::pair<int, unsigned> > Result; + unsigned i = 0; + for (unsigned n = Srcs.size(); i < n; ++i) { + unsigned Reg = Srcs[i].first->getReg(); + unsigned Index = RI.getEncodingValue(Reg) & 0xff; + unsigned Chan = RI.getHWRegChan(Reg); + if (Index > 127) { + Result.push_back(DummyPair); + continue; + } + if (PV.find(Index) != PV.end()) { + Result.push_back(DummyPair); + continue; + } + Result.push_back(std::pair<int, unsigned>(Index, Chan)); + } + for (; i < 3; ++i) + Result.push_back(DummyPair); + return Result; +} + +static std::vector<std::pair<int, unsigned> > +Swizzle(std::vector<std::pair<int, unsigned> > Src, + R600InstrInfo::BankSwizzle Swz) { + switch (Swz) { + case R600InstrInfo::ALU_VEC_012: + break; + case R600InstrInfo::ALU_VEC_021: + std::swap(Src[1], Src[2]); + break; + case R600InstrInfo::ALU_VEC_102: + std::swap(Src[0], Src[1]); + break; + case R600InstrInfo::ALU_VEC_120: + std::swap(Src[0], Src[1]); + std::swap(Src[0], Src[2]); + break; + case R600InstrInfo::ALU_VEC_201: + std::swap(Src[0], Src[2]); + std::swap(Src[0], Src[1]); + break; + case R600InstrInfo::ALU_VEC_210: + std::swap(Src[0], Src[2]); + break; + } + return Src; +} + +static bool +isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, + const std::vector<R600InstrInfo::BankSwizzle> &Swz, + unsigned CheckedSize) { + int Vector[4][3]; + memset(Vector, -1, sizeof(Vector)); + for (unsigned i = 0; i < CheckedSize; i++) { + const std::vector<std::pair<int, unsigned> > &Srcs = + Swizzle(IGSrcs[i], Swz[i]); + for (unsigned j = 0; j < 3; j++) { + const std::pair<int, unsigned> &Src = Srcs[j]; + if (Src.first < 0) + continue; + if (Vector[Src.second][j] < 0) + Vector[Src.second][j] = Src.first; + if (Vector[Src.second][j] != Src.first) + return false; + } + } + return true; +} + +static bool recursiveFitsFPLimitation( +const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, +std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, +unsigned Depth = 0) { + if (!isLegal(IGSrcs, SwzCandidate, Depth)) + return false; + if (IGSrcs.size() == Depth) + return true; + unsigned i = SwzCandidate[Depth]; + for (; i < 6; i++) { + SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i; + if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1)) + return true; + } + SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012; + return false; +} + +bool +R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, + const DenseMap<unsigned, unsigned> &PV, + std::vector<BankSwizzle> &ValidSwizzle) + const { + //Todo : support shared src0 - src1 operand + + std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; + ValidSwizzle.clear(); + for (unsigned i = 0, e = IG.size(); i < e; ++i) { + IGSrcs.push_back(ExtractSrcs(IG[i], PV)); + unsigned Op = getOperandIdx(IG[i]->getOpcode(), + R600Operands::BANK_SWIZZLE); + ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) + IG[i]->getOperand(Op).getImm()); + } + bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle); + if (!Result) + return false; + return true; +} + + bool R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) const { @@ -198,34 +369,22 @@ bool R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { std::vector<unsigned> Consts; for (unsigned i = 0, n = MIs.size(); i < n; i++) { - const MachineInstr *MI = MIs[i]; - - const R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - + MachineInstr *MI = MIs[i]; if (!isALUInstr(MI->getOpcode())) continue; - for (unsigned j = 0; j < 3; j++) { - int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); - if (SrcIdx < 0) - break; - unsigned Reg = MI->getOperand(SrcIdx).getReg(); - if (Reg == AMDGPU::ALU_CONST) { - unsigned Const = MI->getOperand( - getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); - Consts.push_back(Const); - continue; - } - if (AMDGPU::R600_KC0RegClass.contains(Reg) || - AMDGPU::R600_KC1RegClass.contains(Reg)) { - unsigned Index = RI.getEncodingValue(Reg) & 0xff; - unsigned Chan = RI.getHWRegChan(Reg); + const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Srcs = + getSrcs(MI); + + for (unsigned j = 0, e = Srcs.size(); j < e; j++) { + std::pair<MachineOperand *, unsigned> Src = Srcs[j]; + if (Src.first->getReg() == AMDGPU::ALU_CONST) + Consts.push_back(Src.second); + if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || + AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { + unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; + unsigned Chan = RI.getHWRegChan(Src.first->getReg()); Consts.push_back((Index << 2) | Chan); - continue; } } } @@ -657,7 +816,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, AddrReg, ValueReg) - .addReg(AMDGPU::AR_X, RegState::Implicit); + .addReg(AMDGPU::AR_X, + RegState::Implicit | RegState::Kill); setImmOperand(Mov, R600Operands::DST_REL, 1); return Mov; } @@ -674,7 +834,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, ValueReg, AddrReg) - .addReg(AMDGPU::AR_X, RegState::Implicit); + .addReg(AMDGPU::AR_X, + RegState::Implicit | RegState::Kill); setImmOperand(Mov, R600Operands::SRC0_REL, 1); return Mov; @@ -729,6 +890,95 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB return MIB; } +#define OPERAND_CASE(Label) \ + case Label: { \ + static const R600Operands::VecOps Ops[] = \ + { \ + Label##_X, \ + Label##_Y, \ + Label##_Z, \ + Label##_W \ + }; \ + return Ops[Slot]; \ + } + +static R600Operands::VecOps +getSlotedOps(R600Operands::Ops Op, unsigned Slot) { + switch (Op) { + OPERAND_CASE(R600Operands::UPDATE_EXEC_MASK) + OPERAND_CASE(R600Operands::UPDATE_PREDICATE) + OPERAND_CASE(R600Operands::WRITE) + OPERAND_CASE(R600Operands::OMOD) + OPERAND_CASE(R600Operands::DST_REL) + OPERAND_CASE(R600Operands::CLAMP) + OPERAND_CASE(R600Operands::SRC0) + OPERAND_CASE(R600Operands::SRC0_NEG) + OPERAND_CASE(R600Operands::SRC0_REL) + OPERAND_CASE(R600Operands::SRC0_ABS) + OPERAND_CASE(R600Operands::SRC0_SEL) + OPERAND_CASE(R600Operands::SRC1) + OPERAND_CASE(R600Operands::SRC1_NEG) + OPERAND_CASE(R600Operands::SRC1_REL) + OPERAND_CASE(R600Operands::SRC1_ABS) + OPERAND_CASE(R600Operands::SRC1_SEL) + OPERAND_CASE(R600Operands::PRED_SEL) + default: + llvm_unreachable("Wrong Operand"); + } +} + +#undef OPERAND_CASE + +static int +getVecOperandIdx(R600Operands::VecOps Op) { + return 1 + Op; +} + + +MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( + MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) + const { + assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); + unsigned Opcode; + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.getGeneration() <= AMDGPUSubtarget::R700) + Opcode = AMDGPU::DOT4_r600; + else + Opcode = AMDGPU::DOT4_eg; + MachineBasicBlock::iterator I = MI; + MachineOperand &Src0 = MI->getOperand( + getVecOperandIdx(getSlotedOps(R600Operands::SRC0, Slot))); + MachineOperand &Src1 = MI->getOperand( + getVecOperandIdx(getSlotedOps(R600Operands::SRC1, Slot))); + MachineInstr *MIB = buildDefaultInstruction( + MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); + static const R600Operands::Ops Operands[14] = { + R600Operands::UPDATE_EXEC_MASK, + R600Operands::UPDATE_PREDICATE, + R600Operands::WRITE, + R600Operands::OMOD, + R600Operands::DST_REL, + R600Operands::CLAMP, + R600Operands::SRC0_NEG, + R600Operands::SRC0_REL, + R600Operands::SRC0_ABS, + R600Operands::SRC0_SEL, + R600Operands::SRC1_NEG, + R600Operands::SRC1_REL, + R600Operands::SRC1_ABS, + R600Operands::SRC1_SEL, + }; + + for (unsigned i = 0; i < 14; i++) { + MachineOperand &MO = MI->getOperand( + getVecOperandIdx(getSlotedOps(Operands[i], Slot))); + assert (MO.isImm()); + setImmOperand(MIB, Operands[i], MO.getImm()); + } + MIB->getOperand(20).setImm(0); + return MIB; +} + MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, @@ -744,6 +994,11 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, return getOperandIdx(MI.getOpcode(), Op); } +int R600InstrInfo::getOperandIdx(const MachineInstr &MI, + R600Operands::VecOps Op) const { + return getOperandIdx(MI.getOpcode(), Op); +} + int R600InstrInfo::getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const { unsigned TargetFlags = get(Opcode).TSFlags; @@ -774,6 +1029,11 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode, return R600Operands::ALUOpTable[OpTableIdx][Op]; } +int R600InstrInfo::getOperandIdx(unsigned Opcode, + R600Operands::VecOps Op) const { + return Op + 1; +} + void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const { int Idx = getOperandIdx(*MI, Op); |