diff options
author | Justin Holewinski <justin.holewinski@gmail.com> | 2011-09-26 18:57:27 +0000 |
---|---|---|
committer | Justin Holewinski <justin.holewinski@gmail.com> | 2011-09-26 18:57:27 +0000 |
commit | bc97f4402bbc4a419a8398036ff180dce03e9c72 (patch) | |
tree | d5530a2c8796ad6000e450514709aa92df6933ee /lib/Target/PTX/PTXSelectionDAGInfo.cpp | |
parent | 63bce90c0dd68bba47c2d997c63fe6951eb2dfbf (diff) | |
download | external_llvm-bc97f4402bbc4a419a8398036ff180dce03e9c72.zip external_llvm-bc97f4402bbc4a419a8398036ff180dce03e9c72.tar.gz external_llvm-bc97f4402bbc4a419a8398036ff180dce03e9c72.tar.bz2 |
PTX: Implement PTXSelectionDAGInfo
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140549 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/PTX/PTXSelectionDAGInfo.cpp')
-rw-r--r-- | lib/Target/PTX/PTXSelectionDAGInfo.cpp | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/lib/Target/PTX/PTXSelectionDAGInfo.cpp new file mode 100644 index 0000000..e333183 --- /dev/null +++ b/lib/Target/PTX/PTXSelectionDAGInfo.cpp @@ -0,0 +1,148 @@ +//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-selectiondag-info" +#include "PTXTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<PTXSubtarget>()) { +} + +PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { +} + +SDValue +PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + // Always inline memcpys. In PTX, we do not have a C library that provides + // a memcpy function. + //if (!AlwaysInline) + // return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcPtrInfo.getWithOffset(SrcOff), isVolatile, + false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstPtrInfo.getWithOffset(DstOff), + isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstPtrInfo.getWithOffset(DstOff), false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} + +SDValue PTXSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + llvm_unreachable("memset lowering not implemented for PTX yet"); +} + |