diff options
Diffstat (limited to 'lib/Target/R600')
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 14 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 24 | ||||
-rw-r--r-- | lib/Target/R600/SILowerControlFlow.cpp | 127 |
4 files changed, 96 insertions, 71 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 4c672ca..18fa908 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::SI_INTERP_CONST: LowerSI_INTERP_CONST(MI, *BB, I, MRI); break; - case AMDGPU::SI_KIL: - LowerSI_KIL(MI, *BB, I, MRI); - break; case AMDGPU::SI_WQM: LowerSI_WQM(MI, *BB, I, MRI); break; @@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, MI->eraseFromParent(); } -void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { - // Clear this pixel from the exec mask if the operand is negative - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), - AMDGPU::VCC) - .addReg(AMDGPU::SREG_LIT_0) - .addOperand(MI->getOperand(0)); - - MI->eraseFromParent(); -} - void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index c088112..db36eef 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering { MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; - void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 005be96..cac42da 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI < imm:$attr, SReg_32:$params))] >; -def SI_KIL : InstSI < - (outs), - (ins VReg_32:$src), - "SI_KIL $src", - [(int_AMDGPU_kill VReg_32:$src)] ->; - def SI_WQM : InstSI < (outs), (ins), @@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI < [(int_SI_end_cf SReg_64:$saved)] >; +def SI_KILL : InstSI < + (outs), + (ins VReg_32:$src), + "SI_KIL $src", + [(int_AMDGPU_kill VReg_32:$src)] +>; + } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 // Uses = [EXEC], Defs = [EXEC] } // end IsCodeGenOnly, isPseudo +def : Pat < + (int_AMDGPU_kilp), + (SI_KILL (V_MOV_IMM_I32 0xbf800000)) +>; + /* int_SI_vs_load_input */ def : Pat< (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, @@ -1315,11 +1320,6 @@ def : Pat< >; def : Pat < - (int_AMDGPU_kilp), - (SI_KIL (V_MOV_IMM_I32 0xbf800000)) ->; - -def : Pat < (int_AMDGPU_cube VReg_128:$src), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 3fbe653..3780e40 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -68,7 +68,10 @@ private: static char ID; const TargetInstrInfo *TII; - void Skip(MachineInstr &MI, MachineOperand &To); + bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); + + void Skip(MachineInstr &From, MachineOperand &To); + void SkipIfDead(MachineInstr &MI); void If(MachineInstr &MI); void Else(MachineInstr &MI); @@ -78,6 +81,7 @@ private: void Loop(MachineInstr &MI); void EndCf(MachineInstr &MI); + void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); public: @@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { return new SILowerControlFlowPass(tm); } -void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { +bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, + MachineBasicBlock *To) { + unsigned NumInstr = 0; - for (MachineBasicBlock *MBB = *From.getParent()->succ_begin(); - NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty(); + for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty(); MBB = *MBB->succ_begin()) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); NumInstr < SkipThreshold && I != E; ++I) { if (I->isBundle() || !I->isBundled()) - ++NumInstr; + if (++NumInstr >= SkipThreshold) + return true; } } - if (NumInstr < SkipThreshold) + return false; +} + +void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { + + if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB())) return; DebugLoc DL = From.getDebugLoc(); @@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { .addReg(AMDGPU::EXEC); } +void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + if (!shouldSkip(&MBB, &MBB.getParent()->back())) + return; + + MachineBasicBlock::iterator Insert = &MI; + ++Insert; + + // If the exec mask is non-zero, skip the next two instructions + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(3) + .addReg(AMDGPU::EXEC); + + // Exec mask is zero: Export to NULL target... + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) + .addImm(0) + .addImm(0x09) // V_008DFC_SQ_EXP_NULL + .addImm(0) + .addImm(1) + .addImm(1) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0); + + // ... and terminate wavefront + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); +} + void SILowerControlFlowPass::If(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); @@ -242,8 +285,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) { assert(0); } +void SILowerControlFlowPass::Kill(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + // Kill is only allowed in pixel shaders + MachineFunction &MF = *MBB.getParent(); + SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + assert(Info->ShaderType == ShaderType::PIXEL); + + // Clear this pixel from the exec mask if the operand is negative + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) + .addReg(AMDGPU::SREG_LIT_0) + .addOperand(MI.getOperand(0)); + + MI.eraseFromParent(); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { - bool HaveCf = false; + + bool HaveKill = false; + unsigned Depth = 0; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { @@ -257,6 +320,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { default: break; case AMDGPU::SI_IF: + ++Depth; If(MI); break; @@ -277,14 +341,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { break; case AMDGPU::SI_LOOP: + ++Depth; Loop(MI); break; case AMDGPU::SI_END_CF: - HaveCf = true; + if (--Depth == 0 && HaveKill) { + SkipIfDead(MI); + HaveKill = false; + } EndCf(MI); break; + case AMDGPU::SI_KILL: + if (Depth == 0) + SkipIfDead(MI); + else + HaveKill = true; + Kill(MI); + break; + case AMDGPU::S_BRANCH: Branch(MI); break; @@ -292,40 +368,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { } } - // TODO: What is this good for? - unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType; - if (HaveCf && ShaderType == ShaderType::PIXEL) { - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - - MachineBasicBlock &MBB = *BI; - if (MBB.succ_empty()) { - - MachineInstr &MI = *MBB.getFirstNonPHI(); - DebugLoc DL = MI.getDebugLoc(); - - // If the exec mask is non-zero, skip the next two instructions - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(3) - .addReg(AMDGPU::EXEC); - - // Exec mask is zero: Export to NULL target... - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP)) - .addImm(0) - .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) - .addImm(1) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0); - - // ... and terminate wavefront - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM)); - } - } - } - return true; } |