diff options
Diffstat (limited to 'lib/Target/R600/R600ExpandSpecialInstrs.cpp')
| -rw-r--r-- | lib/Target/R600/R600ExpandSpecialInstrs.cpp | 73 |
1 files changed, 46 insertions, 27 deletions
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp index f8c900f..67b42d7 100644 --- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp @@ -38,7 +38,7 @@ private: public: R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } + TII(0) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -56,6 +56,7 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { } bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); const R600RegisterInfo &TRI = TII->getRegisterInfo(); @@ -81,28 +82,13 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { AMDGPU::ZERO); // src1 TII->addFlag(PredSet, 0, MO_FLAG_MASK); if (Flags & MO_FLAG_PUSH) { - TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); + TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1); } else { - TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1); + TII->setImmOperand(PredSet, AMDGPU::OpName::update_pred, 1); } MI.eraseFromParent(); continue; } - case AMDGPU::BREAK: { - MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, - AMDGPU::PRED_SETE_INT, - AMDGPU::PREDICATE_BIT, - AMDGPU::ZERO, - AMDGPU::ZERO); - TII->addFlag(PredSet, 0, MO_FLAG_MASK); - TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); - - BuildMI(MBB, I, MBB.findDebugLoc(I), - TII->get(AMDGPU::PREDICATED_BREAK)) - .addReg(AMDGPU::PREDICATE_BIT); - MI.eraseFromParent(); - continue; - } case AMDGPU::INTERP_PAIR_XY: { MachineInstr *BMI; @@ -182,6 +168,45 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { MI.eraseFromParent(); continue; } + case AMDGPU::DOT_4: { + + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; + + for (unsigned Chan = 0; Chan < 4; ++Chan) { + bool Mask = (Chan != TRI.getHWRegChan(DstReg)); + unsigned SubDstReg = + AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); + MachineInstr *BMI = + TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg); + if (Chan > 0) { + BMI->bundleWithPred(); + } + if (Mask) { + TII->addFlag(BMI, 0, MO_FLAG_MASK); + } + if (Chan != 3) + TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + unsigned Opcode = BMI->getOpcode(); + // While not strictly necessary from hw point of view, we force + // all src operands of a dot4 inst to belong to the same slot. + unsigned Src0 = BMI->getOperand( + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0)) + .getReg(); + unsigned Src1 = BMI->getOperand( + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1)) + .getReg(); + (void) Src0; + (void) Src1; + if ((TRI.getEncodingValue(Src0) & 0xff) < 127 && + (TRI.getEncodingValue(Src1) & 0xff) < 127) + assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1)); + } + MI.eraseFromParent(); + continue; + } } bool IsReduction = TII->isReductionOp(MI.getOpcode()); @@ -218,14 +243,14 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { // T0_W = CUBE T1_Y, T1_Z for (unsigned Chan = 0; Chan < 4; Chan++) { unsigned DstReg = MI.getOperand( - TII->getOperandIdx(MI, R600Operands::DST)).getReg(); + TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg(); unsigned Src0 = MI.getOperand( - TII->getOperandIdx(MI, R600Operands::SRC0)).getReg(); + TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg(); unsigned Src1 = 0; // Determine the correct source registers if (!IsCube) { - int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1); + int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1); if (Src1Idx != -1) { Src1 = MI.getOperand(Src1Idx).getReg(); } @@ -268,12 +293,6 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::CUBE_eg_pseudo: Opcode = AMDGPU::CUBE_eg_real; break; - case AMDGPU::DOT4_r600_pseudo: - Opcode = AMDGPU::DOT4_r600_real; - break; - case AMDGPU::DOT4_eg_pseudo: - Opcode = AMDGPU::DOT4_eg_real; - break; default: break; } |
