diff options
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 19 | ||||
-rw-r--r-- | test/CodeGen/R600/predicate-dp4.ll | 27 |
2 files changed, 46 insertions, 0 deletions
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 8436d5f..1f47416 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -1009,6 +1009,20 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI, return true; } + if (MI->getOpcode() == AMDGPU::DOT_4) { + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) + .setReg(Pred[2].getReg()); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); + return true; + } + if (PIdx != -1) { MachineOperand &PMO = MI->getOperand(PIdx); PMO.setReg(Pred[2].getReg()); @@ -1217,6 +1231,11 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( AMDGPU::OpName::src1_sel, }; + MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), + getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); + MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) + .setReg(MO.getReg()); + for (unsigned i = 0; i < 14; i++) { MachineOperand &MO = MI->getOperand( getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); diff --git a/test/CodeGen/R600/predicate-dp4.ll b/test/CodeGen/R600/predicate-dp4.ll new file mode 100644 index 0000000..e48d6a7 --- /dev/null +++ b/test/CodeGen/R600/predicate-dp4.ll @@ -0,0 +1,27 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman + +; CHECK-LABEL: @main +; CHECK: PRED_SETE_INT * Pred, +; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one +define void @main(<4 x float> inreg) #0 { +main_body: + %1 = extractelement <4 x float> %0, i32 0 + %2 = bitcast float %1 to i32 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %IF, label %ENDIF + +IF: ; preds = %main_body + %4 = call float @llvm.AMDGPU.dp4(<4 x float> %0, <4 x float> %0) + br label %ENDIF + +ENDIF: ; preds = %IF, %main_body + %5 = phi float [%4, %IF], [0.000000e+00, %main_body] + %6 = insertelement <4 x float> undef, float %5, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %6, i32 0, i32 0) + ret void +} + +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) +attributes #1 = { readnone } +attributes #0 = { "ShaderType"="0" } |