diff options
Diffstat (limited to 'lib/Target/R600/R600ISelLowering.cpp')
| -rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 55 |
1 files changed, 39 insertions, 16 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 0fcb488..6405a82 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -207,7 +207,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_64_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { - unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; + unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) .addOperand(MI->getOperand(0)) @@ -457,9 +457,9 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( // Instruction is left unmodified if its not the last one of its type bool isLastInstructionOfItsType = true; unsigned InstExportType = MI->getOperand(1).getImm(); - for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), + for (MachineBasicBlock::iterator NextExportInst = std::next(I), EndBlock = BB->end(); NextExportInst != EndBlock; - NextExportInst = llvm::next(NextExportInst)) { + NextExportInst = std::next(NextExportInst)) { if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { unsigned CurrentInstExportType = NextExportInst->getOperand(1) @@ -470,7 +470,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( } } } - bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; + bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; if (!EOP && !isLastInstructionOfItsType) return BB; unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; @@ -762,7 +762,9 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { - default: return; + default: + AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); + return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); return; case ISD::LOAD: { @@ -977,7 +979,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const HWFalse = DAG.getConstant(0, CompareVT); } else { - assert(!"Unhandled value type in LowerSELECT_CC"); + llvm_unreachable("Unhandled value type in LowerSELECT_CC"); } // Lower this unsupported SELECT_CC into a combination of two supported @@ -990,7 +992,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const DAG.getCondCode(ISD::SETNE)); } -/// LLVM generates byte-addresed pointers. For indirect addressing, we need to +/// LLVM generates byte-addressed pointers. For indirect addressing, we need to /// convert these pointers to a register index. Each register holds /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used @@ -1099,7 +1101,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { Ptr, DAG.getConstant(2, MVT::i32))); if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { - assert(!"Truncated and indexed stores not supported yet"); + llvm_unreachable("Truncated and indexed stores not supported yet"); } else { Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); } @@ -1113,6 +1115,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } + SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG); + if (Ret.getNode()) { + return Ret; + } // Lowering for indirect addressing const MachineFunction &MF = DAG.getMachineFunction(); @@ -1204,6 +1210,15 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const SDValue Ptr = Op.getOperand(1); SDValue LoweredLoad; + SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); + if (Ret.getNode()) { + SDValue Ops[2]; + Ops[0] = Ret; + Ops[1] = Chain; + return DAG.getMergeValues(Ops, 2, DL); + } + + if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), @@ -1239,7 +1254,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const } Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements); } else { - // non constant ptr cant be folded, keeps it as a v4f32 load + // non-constant ptr can't be folded, keeps it as a v4f32 load Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)), DAG.getConstant(LoadNode->getAddressSpace() - @@ -1370,14 +1385,19 @@ SDValue R600TargetLowering::LowerFormalArguments( PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), AMDGPUAS::CONSTANT_BUFFER_0); + // i64 isn't a legal type, so the register type used ends up as i32, which + // isn't expected here. It attempts to create this sextload, but it ends up + // being invalid. Somehow this seems to work with i64 arguments, but breaks + // for <1 x i64>. + // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, false, false, 4); - // 4 is the prefered alignment for - // the CONSTANT memory space. + // 4 is the preferred alignment for + // the CONSTANT memory space. InVals.push_back(Arg); } return Chain; @@ -1442,17 +1462,20 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, VectorEntry.getOperand(3) }; bool isUnmovable[4] = { false, false, false, false }; - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < 4; i++) { RemapSwizzle[i] = i; + if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) + ->getZExtValue(); + if (i == Idx) + isUnmovable[Idx] = true; + } + } for (unsigned i = 0; i < 4; i++) { if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) ->getZExtValue(); - if (i == Idx) { - isUnmovable[Idx] = true; - continue; - } if (isUnmovable[Idx]) continue; // Swap i and Idx |
