diff options
-rw-r--r-- | lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 30 | ||||
-rw-r--r-- | lib/Target/R600/R600ControlFlowFinalizer.cpp | 84 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 198 |
3 files changed, 240 insertions, 72 deletions
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 927bcbd..469a8ad 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(Inst, OS); break; } - case AMDGPU::CF_TC: - case AMDGPU::CF_VC: - case AMDGPU::CF_CALL_FS: + case AMDGPU::CF_TC_EG: + case AMDGPU::CF_VC_EG: + case AMDGPU::CF_CALL_FS_EG: + case AMDGPU::CF_TC_R600: + case AMDGPU::CF_VC_R600: + case AMDGPU::CF_CALL_FS_R600: return; - case AMDGPU::WHILE_LOOP: - case AMDGPU::END_LOOP: - case AMDGPU::LOOP_BREAK: - case AMDGPU::CF_CONTINUE: - case AMDGPU::CF_JUMP: - case AMDGPU::CF_ELSE: - case AMDGPU::POP: { + case AMDGPU::WHILE_LOOP_EG: + case AMDGPU::END_LOOP_EG: + case AMDGPU::LOOP_BREAK_EG: + case AMDGPU::CF_CONTINUE_EG: + case AMDGPU::CF_JUMP_EG: + case AMDGPU::CF_ELSE_EG: + case AMDGPU::POP_EG: + case AMDGPU::WHILE_LOOP_R600: + case AMDGPU::END_LOOP_R600: + case AMDGPU::LOOP_BREAK_R600: + case AMDGPU::CF_CONTINUE_R600: + case AMDGPU::CF_JUMP_R600: + case AMDGPU::CF_ELSE_R600: + case AMDGPU::POP_R600: { uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); EmitByte(INSTR_NATIVE, OS); Emit(Inst, OS); diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 3a6c7ea..cfaa36e 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -30,9 +30,22 @@ namespace llvm { class R600ControlFlowFinalizer : public MachineFunctionPass { private: + enum ControlFlowInstruction { + CF_TC, + CF_CALL_FS, + CF_WHILE_LOOP, + CF_END_LOOP, + CF_LOOP_BREAK, + CF_LOOP_CONTINUE, + CF_JUMP, + CF_ELSE, + CF_POP + }; + static char ID; const R600InstrInfo *TII; unsigned MaxFetchInst; + const AMDGPUSubtarget &ST; bool isFetch(const MachineInstr *MI) const { switch (MI->getOpcode()) { @@ -70,6 +83,52 @@ private: } } + const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) { + switch (CFI) { + case CF_TC: + return TII->get(AMDGPU::CF_TC_R600); + case CF_CALL_FS: + return TII->get(AMDGPU::CF_CALL_FS_R600); + case CF_WHILE_LOOP: + return TII->get(AMDGPU::WHILE_LOOP_R600); + case CF_END_LOOP: + return TII->get(AMDGPU::END_LOOP_R600); + case CF_LOOP_BREAK: + return TII->get(AMDGPU::LOOP_BREAK_R600); + case CF_LOOP_CONTINUE: + return TII->get(AMDGPU::CF_CONTINUE_R600); + case CF_JUMP: + return TII->get(AMDGPU::CF_JUMP_R600); + case CF_ELSE: + return TII->get(AMDGPU::CF_ELSE_R600); + case CF_POP: + return TII->get(AMDGPU::POP_R600); + } + } else { + switch (CFI) { + case CF_TC: + return TII->get(AMDGPU::CF_TC_EG); + case CF_CALL_FS: + return TII->get(AMDGPU::CF_CALL_FS_EG); + case CF_WHILE_LOOP: + return TII->get(AMDGPU::WHILE_LOOP_EG); + case CF_END_LOOP: + return TII->get(AMDGPU::END_LOOP_EG); + case CF_LOOP_BREAK: + return TII->get(AMDGPU::LOOP_BREAK_EG); + case CF_LOOP_CONTINUE: + return TII->get(AMDGPU::CF_CONTINUE_EG); + case CF_JUMP: + return TII->get(AMDGPU::CF_JUMP_EG); + case CF_ELSE: + return TII->get(AMDGPU::CF_ELSE_EG); + case CF_POP: + return TII->get(AMDGPU::POP_EG); + } + } + } + MachineBasicBlock::iterator MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned CfAddress) const { @@ -85,7 +144,7 @@ private: break; } BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), - TII->get(AMDGPU::CF_TC)) + getHWInstrDesc(CF_TC)) .addImm(CfAddress) // ADDR .addImm(AluInstCount); // COUNT return I; @@ -104,7 +163,8 @@ private: public: R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())), + ST(tm.getSubtarget<AMDGPUSubtarget>()) { const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) MaxFetchInst = 8; @@ -124,7 +184,7 @@ public: R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); if (MFI->ShaderType == 1) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), - TII->get(AMDGPU::CF_CALL_FS)); + getHWInstrDesc(CF_CALL_FS)); CfCount++; } for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); @@ -154,7 +214,7 @@ public: CurrentStack++; MaxStack = std::max(MaxStack, CurrentStack); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::WHILE_LOOP)) + getHWInstrDesc(CF_WHILE_LOOP)) .addImm(2); std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, std::set<MachineInstr *>()); @@ -170,7 +230,7 @@ public: LoopStack.back(); LoopStack.pop_back(); CounterPropagateAddr(Pair.second, CfCount); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP)) + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) .addImm(Pair.first + 1); MI->eraseFromParent(); CfCount++; @@ -178,7 +238,7 @@ public: } case AMDGPU::IF_PREDICATE_SET: { MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::CF_JUMP)) + getHWInstrDesc(CF_JUMP)) .addImm(0) .addImm(0); IfThenElseStack.push_back(MIb); @@ -192,7 +252,7 @@ public: IfThenElseStack.pop_back(); CounterPropagateAddr(JumpInst, CfCount); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::CF_ELSE)) + getHWInstrDesc(CF_ELSE)) .addImm(0) .addImm(1); DEBUG(dbgs() << CfCount << ":"; MIb->dump();); @@ -207,7 +267,7 @@ public: IfThenElseStack.pop_back(); CounterPropagateAddr(IfOrElseInst, CfCount + 1); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::POP)) + getHWInstrDesc(CF_POP)) .addImm(CfCount + 1) .addImm(1); DEBUG(dbgs() << CfCount << ":"; MIb->dump();); @@ -218,13 +278,13 @@ public: case AMDGPU::PREDICATED_BREAK: { CurrentStack--; CfCount += 3; - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP)) + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(CfCount) .addImm(1); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::LOOP_BREAK)) + getHWInstrDesc(CF_LOOP_BREAK)) .addImm(0); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP)) .addImm(CfCount) .addImm(1); LoopStack.back().second.insert(MIb); @@ -233,7 +293,7 @@ public: } case AMDGPU::CONTINUE: { MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::CF_CONTINUE)) + getHWInstrDesc(CF_LOOP_CONTINUE)) .addImm(0); LoopStack.back().second.insert(MIb); MI->eraseFromParent(); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 663b41a..b4c45e1 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -823,97 +823,103 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT), let Inst{63-32} = Word1; } -class CF_WORD0 { +class CF_WORD0_R600 { field bits<32> Word0; - bits<24> ADDR; - bits<3> JUMPTABLE_SEL; + bits<32> ADDR; - let Word0{23-0} = ADDR; - let Word0{26-24} = JUMPTABLE_SEL; + let Word0 = ADDR; } -class CF_WORD1 { +class CF_WORD1_R600 { field bits<32> Word1; bits<3> POP_COUNT; bits<5> CF_CONST; bits<2> COND; - bits<6> COUNT; + bits<3> COUNT; + bits<6> CALL_COUNT; + bits<1> COUNT_3; + bits<1> END_OF_PROGRAM; bits<1> VALID_PIXEL_MODE; - bits<8> CF_INST; + bits<7> CF_INST; + bits<1> WHOLE_QUAD_MODE; bits<1> BARRIER; let Word1{2-0} = POP_COUNT; let Word1{7-3} = CF_CONST; let Word1{9-8} = COND; - let Word1{15-10} = COUNT; - let Word1{20} = VALID_PIXEL_MODE; - let Word1{29-22} = CF_INST; + let Word1{12-10} = COUNT; + let Word1{18-13} = CALL_COUNT; + let Word1{19} = COUNT_3; + let Word1{21} = END_OF_PROGRAM; + let Word1{22} = VALID_PIXEL_MODE; + let Word1{29-23} = CF_INST; + let Word1{30} = WHOLE_QUAD_MODE; let Word1{31} = BARRIER; } -class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), -ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 { +class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { field bits<64> Inst; let CF_INST = inst; let BARRIER = 1; - let JUMPTABLE_SEL = 0; let CF_CONST = 0; let VALID_PIXEL_MODE = 0; let COND = 0; + let CALL_COUNT = 0; + let COUNT_3 = 0; + let END_OF_PROGRAM = 0; + let WHOLE_QUAD_MODE = 0; let Inst{31-0} = Word0; let Inst{63-32} = Word1; } -def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT), -"TEX $COUNT @$ADDR"> { - let POP_COUNT = 0; -} - -def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT), -"VTX $COUNT @$ADDR"> { - let POP_COUNT = 0; -} +class CF_WORD0_EG { + field bits<32> Word0; -def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; -} + bits<24> ADDR; + bits<3> JUMPTABLE_SEL; -def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; + let Word0{23-0} = ADDR; + let Word0{26-24} = JUMPTABLE_SEL; } -def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; -} +class CF_WORD1_EG { + field bits<32> Word1; -def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; -} + bits<3> POP_COUNT; + bits<5> CF_CONST; + bits<2> COND; + bits<6> COUNT; + bits<1> VALID_PIXEL_MODE; + bits<8> CF_INST; + bits<1> BARRIER; -def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let Word1{2-0} = POP_COUNT; + let Word1{7-3} = CF_CONST; + let Word1{9-8} = COND; + let Word1{15-10} = COUNT; + let Word1{20} = VALID_PIXEL_MODE; + let Word1{29-22} = CF_INST; + let Word1{31} = BARRIER; } -def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; -} +class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { + field bits<64> Inst; -def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> { - let ADDR = 0; - let COUNT = 0; - let POP_COUNT = 0; -} + let CF_INST = inst; + let BARRIER = 1; + let JUMPTABLE_SEL = 0; + let CF_CONST = 0; + let VALID_PIXEL_MODE = 0; + let COND = 0; -def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; } def CF_ALU : ALU_CLAUSE<8, "ALU">; @@ -1433,6 +1439,52 @@ let Predicates = [isR600] in { let Word1{31} = 1; // BARRIER } defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; + + def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT), + "TEX $COUNT @$ADDR"> { + let POP_COUNT = 0; + } + def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT), + "VTX $COUNT @$ADDR"> { + let POP_COUNT = 0; + } + def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), + "LOOP_START_DX10 @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), + "LOOP_BREAK @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), + "CONTINUE @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "JUMP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "ELSE @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { + let ADDR = 0; + let COUNT = 0; + let POP_COUNT = 0; + } + def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "POP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + } // Helper pattern for normalizing inputs to triginomic instructions for R700+ @@ -1589,6 +1641,52 @@ let hasSideEffects = 1 in { } defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; + def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT), + "TEX $COUNT @$ADDR"> { + let POP_COUNT = 0; + } + def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT), + "VTX $COUNT @$ADDR"> { + let POP_COUNT = 0; + } + def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR), + "LOOP_START_DX10 @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR), + "LOOP_BREAK @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR), + "CONTINUE @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "JUMP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "ELSE @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> { + let ADDR = 0; + let COUNT = 0; + let POP_COUNT = 0; + } + def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "POP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + + //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// |