diff options
author | Justin Holewinski <jholewinski@nvidia.com> | 2013-07-01 12:58:52 +0000 |
---|---|---|
committer | Justin Holewinski <jholewinski@nvidia.com> | 2013-07-01 12:58:52 +0000 |
commit | 9bc8feeb4fd15883949900194c93fd1704c404b4 (patch) | |
tree | ac26a33d687daf0c98ed011d3c7aa15a38f686a5 /lib | |
parent | 30b13ebd0b3d29d5f2d3dcbccee31d3a55917277 (diff) | |
download | external_llvm-9bc8feeb4fd15883949900194c93fd1704c404b4.zip external_llvm-9bc8feeb4fd15883949900194c93fd1704c404b4.tar.gz external_llvm-9bc8feeb4fd15883949900194c93fd1704c404b4.tar.bz2 |
[NVPTX] Add isel patterns for [reg+offset] form of ldg/ldu.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185329 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 476 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXIntrinsics.td | 66 |
2 files changed, 430 insertions, 112 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 4457ec3..03a3aa4 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -787,194 +787,476 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { unsigned Opcode; SDLoc DL(N); SDNode *LD; - MemSDNode *Mem = cast<MemSDNode>(N); + SDValue Base, Offset, Addr; - EVT RetVT = Mem->getMemoryVT().getVectorElementType(); + EVT EltVT = Mem->getMemoryVT().getVectorElementType(); - // Select opcode - if (Subtarget.is64Bit()) { + if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LDGV2: - switch (RetVT.getSimpleVT().SimpleTy) { + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar; break; case MVT::i64: - Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar; break; case MVT::f64: - Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar; break; } break; - case NVPTXISD::LDGV4: - switch (RetVT.getSimpleVT().SimpleTy) { + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar; break; } break; - case NVPTXISD::LDUV2: - switch (RetVT.getSimpleVT().SimpleTy) { + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; - break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; - break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar; break; } break; case NVPTXISD::LDUV4: - switch (RetVT.getSimpleVT().SimpleTy) { + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar; break; } break; } - } else { - switch (N->getOpcode()) { - default: - return NULL; - case NVPTXISD::LDGV2: - switch (RetVT.getSimpleVT().SimpleTy) { + + SDValue Ops[] = { Addr, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef<SDValue>(Ops, 2)); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; - break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; - break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64; + break; + } break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64; + break; + } break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64; + break; + } break; } - break; - case NVPTXISD::LDGV4: - switch (RetVT.getSimpleVT().SimpleTy) { + } else { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32; + break; + } break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32; + break; + } break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32; + break; + } break; } - break; - case NVPTXISD::LDUV2: - switch (RetVT.getSimpleVT().SimpleTy) { + } + + SDValue Ops[] = { Base, Offset, Chain }; + + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef<SDValue>(Ops, 3)); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; - break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; - break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64; + break; + } break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64; + break; + } break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64; + break; + } break; } - break; - case NVPTXISD::LDUV4: - switch (RetVT.getSimpleVT().SimpleTy) { + } else { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32; + break; + } break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32; + break; + } break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32; + break; + } break; } - break; } - } - SDValue Ops[] = { Op1, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); + SDValue Ops[] = { Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef<SDValue>(Ops, 2)); + } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 93cdfef..14049b1 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1342,20 +1342,38 @@ int_nvvm_ldu_global_p>; // Elementized vector ldu multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins Int32Regs:$src), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri:$src), !strconcat("ldu.global.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins Int64Regs:$src), + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins imemAny:$src), !strconcat("ldu.global.", TyStr), []>; } -multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int32Regs:$src), +multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri:$src), !strconcat("ldu.global.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int64Regs:$src), + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins imemAny:$src), !strconcat("ldu.global.", TyStr), []>; } @@ -1452,20 +1470,38 @@ defm INT_PTX_LDG_GLOBAL_p64 // Elementized vector ldg multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins Int32Regs:$src), !strconcat("ld.global.nc.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins Int64Regs:$src), !strconcat("ld.global.nc.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins imemAny:$src), + !strconcat("ld.global.nc.", TyStr), []>; } multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri64:$src), !strconcat("ld.global.nc.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src), + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins imemAny:$src), !strconcat("ld.global.nc.", TyStr), []>; } |