From fe7831861432d71de47ce502e799fb7264b9f24c Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 12 Sep 2013 23:44:44 +0000 Subject: R600: Move fabs/fneg/sel folding logic into PostProcessIsel This move makes possible to correctly handle multiples instructions from a single pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190643 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 262 +++------------------------------ lib/Target/R600/R600ISelLowering.cpp | 179 ++++++++++++++++++++++ lib/Target/R600/R600ISelLowering.h | 1 + 3 files changed, 197 insertions(+), 245 deletions(-) (limited to 'lib') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 46e50bc..85e1422 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -201,92 +201,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } switch (Opc) { default: break; - case AMDGPUISD::CONST_ADDRESS: { - for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I); - I != SDNode::use_end(); I = Next) { - Next = llvm::next(I); - if (!I->isMachineOpcode()) { - continue; - } - unsigned Opcode = I->getMachineOpcode(); - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; - int SrcIdx = I.getOperandNo(); - int SelIdx; - // Unlike MachineInstrs, SDNodes do not have results in their operand - // list, so we need to increment the SrcIdx, since - // R600InstrInfo::getOperandIdx is based on the MachineInstr indices. - if (HasDst) { - SrcIdx++; - } - - SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx); - if (SelIdx < 0) { - continue; - } - - SDValue CstOffset; - if (N->getValueType(0).isVector() || - !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset)) - continue; - - // Gather constants values - int SrcIndices[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) - }; - std::vector Consts; - for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { - int OtherSrcIdx = SrcIndices[i]; - int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); - if (OtherSrcIdx < 0 || OtherSelIdx < 0) { - continue; - } - if (HasDst) { - OtherSrcIdx--; - OtherSelIdx--; - } - if (RegisterSDNode *Reg = - dyn_cast(I->getOperand(OtherSrcIdx))) { - if (Reg->getReg() == AMDGPU::ALU_CONST) { - ConstantSDNode *Cst = dyn_cast(I->getOperand(OtherSelIdx)); - Consts.push_back(Cst->getZExtValue()); - } - } - } - - ConstantSDNode *Cst = dyn_cast(CstOffset); - Consts.push_back(Cst->getZExtValue()); - if (!TII->fitsConstReadLimitations(Consts)) - continue; - - // Convert back to SDNode indices - if (HasDst) { - SrcIdx--; - SelIdx--; - } - std::vector Ops; - for (int i = 0, e = I->getNumOperands(); i != e; ++i) { - if (i == SrcIdx) { - Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32)); - } else if (i == SelIdx) { - Ops.push_back(CstOffset); - } else { - Ops.push_back(I->getOperand(i)); - } - } - CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size()); - } - break; - } case ISD::BUILD_VECTOR: { unsigned RegClassID; const AMDGPUSubtarget &ST = TM.getSubtarget(); @@ -508,38 +422,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { const R600InstrInfo *TII = static_cast(TM.getInstrInfo()); - if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) { - bool IsModified = false; - do { - std::vector Ops; - for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); - I != E; ++I) - Ops.push_back(*I); - IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops); - if (IsModified) { - Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); - } - } while (IsModified); - - } if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->hasInstrModifiers(Result->getMachineOpcode())) { - // Fold FNEG/FABS - // TODO: Isel can generate multiple MachineInst, we need to recursively - // parse Result - bool IsModified = false; - do { - std::vector Ops; - for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); - I != E; ++I) - Ops.push_back(*I); - IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); - if (IsModified) { - Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); - } - } while (IsModified); - // If node has a single use which is CLAMP_R600, folds it if (Result->hasOneUse() && Result->isMachineOpcode()) { SDNode *PotentialClamp = *Result->use_begin(); @@ -564,120 +449,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return Result; } -bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, - SDValue &Abs, const R600InstrInfo *TII) { - switch (Src.getOpcode()) { - case ISD::FNEG: - Src = Src.getOperand(0); - Neg = CurDAG->getTargetConstant(1, MVT::i32); - return true; - case ISD::FABS: - if (!Abs.getNode()) - return false; - Src = Src.getOperand(0); - Abs = CurDAG->getTargetConstant(1, MVT::i32); - return true; - case ISD::BITCAST: - Src = Src.getOperand(0); - return true; - default: - return false; - } -} - -bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, - const R600InstrInfo *TII, std::vector &Ops) { - int OperandIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) - }; - int SelIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel) - }; - int NegIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) - }; - int AbsIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), - -1 - }; - - - for (unsigned i = 0; i < 3; i++) { - if (OperandIdx[i] < 0) - return false; - SDValue &Src = Ops[OperandIdx[i] - 1]; - SDValue &Sel = Ops[SelIdx[i] - 1]; - SDValue &Neg = Ops[NegIdx[i] - 1]; - SDValue FakeAbs; - SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; - if (FoldOperand(Src, Sel, Neg, Abs, TII)) - return true; - } - return false; -} - -bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode, - const R600InstrInfo *TII, std::vector &Ops) { - int OperandIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) - }; - int SelIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W) - }; - int NegIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) - }; - int AbsIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) - }; - - for (unsigned i = 0; i < 8; i++) { - if (OperandIdx[i] < 0) - return false; - SDValue &Src = Ops[OperandIdx[i] - 1]; - SDValue &Sel = Ops[SelIdx[i] - 1]; - SDValue &Neg = Ops[NegIdx[i] - 1]; - SDValue &Abs = Ops[AbsIdx[i] - 1]; - if (FoldOperand(Src, Sel, Neg, Abs, TII)) - return true; - } - return false; -} bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { if (!ptr) { @@ -890,26 +661,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) { } void AMDGPUDAGToDAGISel::PostprocessISelDAG() { - - if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { - return; - } - - // Go over all selected nodes and try to fold them a bit more const AMDGPUTargetLowering& Lowering = (*(const AMDGPUTargetLowering*)getTargetLowering()); - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { + bool IsModified = false; + do { + IsModified = false; + // Go over all selected nodes and try to fold them a bit more + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { - SDNode *Node = I; + SDNode *Node = I; - MachineSDNode *MachineNode = dyn_cast(I); - if (!MachineNode) - continue; + MachineSDNode *MachineNode = dyn_cast(I); + if (!MachineNode) + continue; - SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); - if (ResNode != Node) { - ReplaceUses(Node, ResNode); + SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); + if (ResNode != Node) { + ReplaceUses(Node, ResNode); + IsModified = true; + } } - } + CurDAG->RemoveDeadNodes(); + } while (IsModified); } diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index ff9ba52..a7b7a84 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1629,3 +1629,182 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } return SDValue(); } + +static bool +FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, + SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) { + const R600InstrInfo *TII = + static_cast(DAG.getTarget().getInstrInfo()); + if (!Src.isMachineOpcode()) + return false; + switch (Src.getMachineOpcode()) { + case AMDGPU::FNEG_R600: + if (!Neg.getNode()) + return false; + Src = Src.getOperand(0); + Neg = DAG.getTargetConstant(1, MVT::i32); + return true; + case AMDGPU::FABS_R600: + if (!Abs.getNode()) + return false; + Src = Src.getOperand(0); + Abs = DAG.getTargetConstant(1, MVT::i32); + return true; + case AMDGPU::CONST_COPY: { + unsigned Opcode = ParentNode->getMachineOpcode(); + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + + if (!Sel.getNode()) + return false; + + SDValue CstOffset = Src.getOperand(0); + if (ParentNode->getValueType(0).isVector()) + return false; + + // Gather constants values + int SrcIndices[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + }; + std::vector Consts; + for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { + int OtherSrcIdx = SrcIndices[i]; + int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); + if (OtherSrcIdx < 0 || OtherSelIdx < 0) + continue; + if (HasDst) { + OtherSrcIdx--; + OtherSelIdx--; + } + if (RegisterSDNode *Reg = + dyn_cast(ParentNode->getOperand(OtherSrcIdx))) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast( + ParentNode->getOperand(OtherSelIdx)); + Consts.push_back(Cst->getZExtValue()); + } + } + } + + ConstantSDNode *Cst = dyn_cast(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) { + return false; + } + + Sel = CstOffset; + Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32); + return true; + } + default: + return false; + } +} + + +/// \brief Fold the instructions after selecting them +SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, + SelectionDAG &DAG) const { + const R600InstrInfo *TII = + static_cast(DAG.getTarget().getInstrInfo()); + if (!Node->isMachineOpcode()) + return Node; + unsigned Opcode = Node->getMachineOpcode(); + SDValue FakeOp; + + std::vector Ops; + for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end(); + I != E; ++I) + Ops.push_back(*I); + + if (Opcode == AMDGPU::DOT_4) { + int OperandIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + }; + int NegIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) + }; + int AbsIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) + }; + for (unsigned i = 0; i < 8; i++) { + if (OperandIdx[i] < 0) + return Node; + SDValue &Src = Ops[OperandIdx[i] - 1]; + SDValue &Neg = Ops[NegIdx[i] - 1]; + SDValue &Abs = Ops[AbsIdx[i] - 1]; + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); + if (HasDst) + SelIdx--; + SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG)) + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); + } + } else { + if (!TII->hasInstrModifiers(Opcode)) + return Node; + int OperandIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) + }; + int NegIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) + }; + int AbsIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), + -1 + }; + for (unsigned i = 0; i < 3; i++) { + if (OperandIdx[i] < 0) + return Node; + SDValue &Src = Ops[OperandIdx[i] - 1]; + SDValue &Neg = Ops[NegIdx[i] - 1]; + SDValue FakeAbs; + SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); + if (HasDst) + SelIdx--; + SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG)) + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); + } + } + + return Node; +} diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 811850d..6457ad4 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -67,6 +67,7 @@ private: void getStackAddress(unsigned StackWidth, unsigned ElemIdx, unsigned &Channel, unsigned &PtrIncr) const; bool isZero(SDValue Op) const; + virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; }; } // End namespace llvm; -- cgit v1.1