diff options
| author | Stephen Hines <srhines@google.com> | 2013-06-12 13:32:42 -0700 |
|---|---|---|
| committer | Stephen Hines <srhines@google.com> | 2013-06-12 13:32:42 -0700 |
| commit | 1878f9a7874b1ff569d745c0269f49d3daf7203d (patch) | |
| tree | 19a8dbaaedf6a056c617e87596b32d3f452af137 /lib/Target/R600/AMDILISelDAGToDAG.cpp | |
| parent | 7a57f27b857ec4b243d83d392a399f02fc196c0a (diff) | |
| parent | 100fbdd06be7590b23c4707a98cd605bdb519498 (diff) | |
| download | external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.zip external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.gz external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.bz2 | |
Merge commit '100fbdd06be7590b23c4707a98cd605bdb519498' into merge_20130612
Diffstat (limited to 'lib/Target/R600/AMDILISelDAGToDAG.cpp')
| -rw-r--r-- | lib/Target/R600/AMDILISelDAGToDAG.cpp | 241 |
1 files changed, 182 insertions, 59 deletions
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index ba75a44..93432a2 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -14,14 +14,14 @@ #include "AMDGPUInstrInfo.h" #include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPURegisterInfo.h" -#include "AMDILDevices.h" #include "R600InstrInfo.h" #include "SIISelLowering.h" #include "llvm/ADT/ValueMap.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" -#include "llvm/CodeGen/SelectionDAG.h" #include <list> #include <queue> @@ -48,7 +48,10 @@ public: private: inline SDValue getSmallIPtrImm(unsigned Imm); + bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, + const R600InstrInfo *TII, std::vector<unsigned> Cst); bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); + bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); // Complex pattern selectors bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); @@ -164,7 +167,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { default: break; case ISD::BUILD_VECTOR: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); - if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG @@ -194,7 +197,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } if (N->getValueType(0) == MVT::i128) { @@ -211,7 +214,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, N->getOperand(1), SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, - N->getDebugLoc(), N->getValueType(0), Ops); + SDLoc(N), N->getValueType(0), Ops); } case ISD::ConstantFP: @@ -219,7 +222,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); // XXX: Custom immediate lowering not implemented yet. Instead we use // pseudo instructions defined in SIInstructions.td - if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); @@ -314,9 +317,23 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // Fold operands of selected node const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); + if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) { + bool IsModified = false; + do { + std::vector<SDValue> Ops; + for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); + I != E; ++I) + Ops.push_back(*I); + IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops); + if (IsModified) { + Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); + } + } while (IsModified); + + } if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->isALUInstr(Result->getMachineOpcode())) { @@ -359,6 +376,43 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return Result; } +bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, + SDValue &Abs, const R600InstrInfo *TII, + std::vector<unsigned> Consts) { + switch (Src.getOpcode()) { + case AMDGPUISD::CONST_ADDRESS: { + SDValue CstOffset; + if (Src.getValueType().isVector() || + !SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset)) + return false; + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) + return false; + + Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Sel = CstOffset; + return true; + } + case ISD::FNEG: + Src = Src.getOperand(0); + Neg = CurDAG->getTargetConstant(1, MVT::i32); + return true; + case ISD::FABS: + if (!Abs.getNode()) + return false; + Src = Src.getOperand(0); + Abs = CurDAG->getTargetConstant(1, MVT::i32); + return true; + case ISD::BITCAST: + Src = Src.getOperand(0); + return true; + default: + return false; + } +} + bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, const R600InstrInfo *TII, std::vector<SDValue> &Ops) { int OperandIdx[] = { @@ -382,59 +436,101 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, -1 }; + // Gather constants values + std::vector<unsigned> Consts; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = OperandIdx[j]; + if (SrcIdx < 0) + break; + if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); + Consts.push_back(Cst->getZExtValue()); + } + } + } + for (unsigned i = 0; i < 3; i++) { if (OperandIdx[i] < 0) return false; - SDValue Operand = Ops[OperandIdx[i] - 1]; - switch (Operand.getOpcode()) { - case AMDGPUISD::CONST_ADDRESS: { - SDValue CstOffset; - if (Operand.getValueType().isVector() || - !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) - break; - - // Gather others constants values - std::vector<unsigned> Consts; - for (unsigned j = 0; j < 3; j++) { - int SrcIdx = OperandIdx[j]; - if (SrcIdx < 0) - break; - if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { - if (Reg->getReg() == AMDGPU::ALU_CONST) { - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); - Consts.push_back(Cst->getZExtValue()); - } - } - } + SDValue &Src = Ops[OperandIdx[i] - 1]; + SDValue &Sel = Ops[SelIdx[i] - 1]; + SDValue &Neg = Ops[NegIdx[i] - 1]; + SDValue FakeAbs; + SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; + if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts)) + return true; + } + return false; +} - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); - Consts.push_back(Cst->getZExtValue()); - if (!TII->fitsConstReadLimitations(Consts)) - break; +bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode, + const R600InstrInfo *TII, std::vector<SDValue> &Ops) { + int OperandIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_X), + TII->getOperandIdx(Opcode, R600Operands::SRC0_Y), + TII->getOperandIdx(Opcode, R600Operands::SRC0_Z), + TII->getOperandIdx(Opcode, R600Operands::SRC0_W), + TII->getOperandIdx(Opcode, R600Operands::SRC1_X), + TII->getOperandIdx(Opcode, R600Operands::SRC1_Y), + TII->getOperandIdx(Opcode, R600Operands::SRC1_Z), + TII->getOperandIdx(Opcode, R600Operands::SRC1_W) + }; + int SelIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_X), + TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Y), + TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Z), + TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_W), + TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_X), + TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Y), + TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Z), + TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_W) + }; + int NegIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_X), + TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Y), + TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Z), + TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_W), + TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_X), + TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Y), + TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Z), + TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_W) + }; + int AbsIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_X), + TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Y), + TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Z), + TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_W), + TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_X), + TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Y), + TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Z), + TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_W) + }; - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Ops[SelIdx[i] - 1] = CstOffset; - return true; - } - case ISD::FNEG: - if (NegIdx[i] < 0) - break; - Ops[OperandIdx[i] - 1] = Operand.getOperand(0); - Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32); - return true; - case ISD::FABS: - if (AbsIdx[i] < 0) - break; - Ops[OperandIdx[i] - 1] = Operand.getOperand(0); - Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32); - return true; - case ISD::BITCAST: - Ops[OperandIdx[i] - 1] = Operand.getOperand(0); - return true; - default: + // Gather constants values + std::vector<unsigned> Consts; + for (unsigned j = 0; j < 8; j++) { + int SrcIdx = OperandIdx[j]; + if (SrcIdx < 0) break; + if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); + Consts.push_back(Cst->getZExtValue()); + } } } + + for (unsigned i = 0; i < 8; i++) { + if (OperandIdx[i] < 0) + return false; + SDValue &Src = Ops[OperandIdx[i] - 1]; + SDValue &Sel = Ops[SelIdx[i] - 1]; + SDValue &Neg = Ops[NegIdx[i] - 1]; + SDValue &Abs = Ops[AbsIdx[i] - 1]; + if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts)) + return true; + } return false; } @@ -616,7 +712,7 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) && isInt<16>(IMMOffset->getZExtValue())) { Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - CurDAG->getEntryNode().getDebugLoc(), + SDLoc(CurDAG->getEntryNode()), AMDGPU::ZERO, MVT::i32); Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); return true; @@ -649,18 +745,45 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, void AMDGPUDAGToDAGISel::PostprocessISelDAG() { + if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { + return; + } + // Go over all selected nodes and try to fold them a bit more - const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI); + const AMDGPUTargetLowering& Lowering = (*(const AMDGPUTargetLowering*)TLI); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ++I) { - MachineSDNode *Node = dyn_cast<MachineSDNode>(I); - if (!Node) + SDNode *Node = I; + switch (Node->getOpcode()) { + // Fix the register class in copy to CopyToReg nodes - ISel will always + // use SReg classes for 64-bit copies, but this is not always what we want. + case ISD::CopyToReg: { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + SDValue Val = Node->getOperand(2); + const TargetRegisterClass *RC = RegInfo->getRegClass(Reg); + if (RC != &AMDGPU::SReg_64RegClass) { + continue; + } + + if (!Val.getNode()->isMachineOpcode()) { + continue; + } + + const MCInstrDesc Desc = TM.getInstrInfo()->get(Val.getNode()->getMachineOpcode()); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + RegInfo->setRegClass(Reg, TRI->getRegClass(Desc.OpInfo[0].RegClass)); continue; + } + } - SDNode *ResNode = Lowering.PostISelFolding(Node, *CurDAG); - if (ResNode != Node) + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); + if (!MachineNode) + continue; + + SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); + if (ResNode != Node) { ReplaceUses(Node, ResNode); + } } } - |
