diff options
author | Bob Wilson <bob.wilson@apple.com> | 2009-10-14 16:19:03 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2009-10-14 16:19:03 +0000 |
commit | a7c397c9c30df38901751abdcfa2c1c5e310d2e5 (patch) | |
tree | ac9a0903317e03b2bd761a41b54c159cfdca8063 /lib/Target | |
parent | b01bbdcc1af27bd90b552bb1b62b48916e0d4be3 (diff) | |
download | external_llvm-a7c397c9c30df38901751abdcfa2c1c5e310d2e5.zip external_llvm-a7c397c9c30df38901751abdcfa2c1c5e310d2e5.tar.gz external_llvm-a7c397c9c30df38901751abdcfa2c1c5e310d2e5.tar.bz2 |
Refactor code for selecting NEON load lane intrinsics.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84109 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 333 |
1 files changed, 122 insertions, 211 deletions
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 180b559..058361b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -126,6 +126,13 @@ private: /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. SDNode *SelectDYN_ALLOC(SDValue Op); + /// SelectVLDLane - Select NEON load structure to one lane. NumVecs should + /// be 2, 3 or 4. The opcode arrays specify the instructions used for + /// loading D registers and even subregs and odd subregs of Q registers. + SDNode *SelectVLDLane(SDValue Op, unsigned NumVecs, + unsigned *DOpcodes, unsigned *QOpcodes0, + unsigned *QOpcodes1); + /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc); @@ -970,6 +977,109 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { VT, SDValue(Pair, 0), V1, SubReg1); } +/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type +/// for a 64-bit subregister of the vector. +static EVT GetNEONSubregVT(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled NEON type"); + case MVT::v16i8: return MVT::v8i8; + case MVT::v8i16: return MVT::v4i16; + case MVT::v4f32: return MVT::v2f32; + case MVT::v4i32: return MVT::v2i32; + case MVT::v2i64: return MVT::v1i64; + } +} + +SDNode *ARMDAGToDAGISel::SelectVLDLane(SDValue Op, unsigned NumVecs, + unsigned *DOpcodes, unsigned *QOpcodes0, + unsigned *QOpcodes1) { + assert(NumVecs >=2 && NumVecs <= 4 && "VLDLane NumVecs out-of-range"); + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + + SDValue Chain = N->getOperand(0); + unsigned Lane = + cast<ConstantSDNode>(N->getOperand(NumVecs+3))->getZExtValue(); + EVT VT = N->getValueType(0); + bool is64BitVector = VT.is64BitVector(); + + // Quad registers are handled by extracting subregs, doing the load, + // and then inserting the results as subregs. Find the subreg info. + unsigned NumElts = 0; + int SubregIdx = 0; + EVT RegVT = VT; + if (!is64BitVector) { + RegVT = GetNEONSubregVT(VT); + NumElts = RegVT.getVectorNumElements(); + SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + } + + unsigned OpcodeIndex; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld lane type"); + // Double-register operations: + case MVT::v8i8: OpcodeIndex = 0; break; + case MVT::v4i16: OpcodeIndex = 1; break; + case MVT::v2f32: + case MVT::v2i32: OpcodeIndex = 2; break; + // Quad-register operations: + case MVT::v8i16: OpcodeIndex = 0; break; + case MVT::v4f32: + case MVT::v4i32: OpcodeIndex = 1; break; + } + + SmallVector<SDValue, 9> Ops; + Ops.push_back(MemAddr); + Ops.push_back(MemUpdate); + Ops.push_back(MemOpc); + + unsigned Opc = 0; + if (is64BitVector) { + Opc = DOpcodes[OpcodeIndex]; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } else { + // Check if this is loading the even or odd subreg of a Q register. + if (Lane < NumElts) { + Opc = QOpcodes0[OpcodeIndex]; + } else { + Lane -= NumElts; + Opc = QOpcodes1[OpcodeIndex]; + } + // Extract the subregs of the input vector. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(Vec+3))); + } + Ops.push_back(getI32Imm(Lane)); + Ops.push_back(Chain); + + std::vector<EVT> ResTys(NumVecs, RegVT); + ResTys.push_back(MVT::Other); + SDNode *VLdLn = + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+5); + // For a 64-bit vector load to D registers, nothing more needs to be done. + if (is64BitVector) + return VLdLn; + + // For 128-bit vectors, take the 64-bit results of the load and insert them + // as subregs into the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, + N->getOperand(Vec+3), + SDValue(VLdLn, Vec)); + ReplaceUses(SDValue(N, Vec), QuadVec); + } + + Chain = SDValue(VLdLn, NumVecs); + ReplaceUses(SDValue(N, NumVecs), Chain); + return NULL; +} + SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc) { if (!Subtarget->hasV6T2Ops()) @@ -1568,223 +1678,24 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } case Intrinsic::arm_neon_vld2lane: { - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) - return NULL; - SDValue Chain = N->getOperand(0); - unsigned Lane = cast<ConstantSDNode>(N->getOperand(5))->getZExtValue(); - if (VT.is64BitVector()) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vld2lane type"); - case MVT::v8i8: Opc = ARM::VLD2LNd8; break; - case MVT::v4i16: Opc = ARM::VLD2LNd16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD2LNd32; break; - } - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - getI32Imm(Lane), Chain }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 7); - } - // Quad registers are handled by extracting subregs, doing the load, - // and then inserting the results as subregs. - EVT RegVT; - unsigned Opc2 = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vld2lane type"); - case MVT::v8i16: - Opc = ARM::VLD2LNq16a; - Opc2 = ARM::VLD2LNq16b; - RegVT = MVT::v4i16; - break; - case MVT::v4f32: - Opc = ARM::VLD2LNq32a; - Opc2 = ARM::VLD2LNq32b; - RegVT = MVT::v2f32; - break; - case MVT::v4i32: - Opc = ARM::VLD2LNq32a; - Opc2 = ARM::VLD2LNq32b; - RegVT = MVT::v2i32; - break; - } - unsigned NumElts = RegVT.getVectorNumElements(); - int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; - - SDValue D0 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(3)); - SDValue D1 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(4)); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, D0, D1, - getI32Imm(Lane % NumElts), Chain }; - SDNode *VLdLn = CurDAG->getMachineNode((Lane < NumElts) ? Opc : Opc2, - dl, RegVT, RegVT, MVT::Other, - Ops, 7); - SDValue Q0 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(3), - SDValue(VLdLn, 0)); - SDValue Q1 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(4), - SDValue(VLdLn, 1)); - Chain = SDValue(VLdLn, 2); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - ReplaceUses(SDValue(N, 2), Chain); - return NULL; + unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 }; + unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a }; + unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b }; + return SelectVLDLane(Op, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld3lane: { - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) - return NULL; - SDValue Chain = N->getOperand(0); - unsigned Lane = cast<ConstantSDNode>(N->getOperand(6))->getZExtValue(); - if (VT.is64BitVector()) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vld3lane type"); - case MVT::v8i8: Opc = ARM::VLD3LNd8; break; - case MVT::v4i16: Opc = ARM::VLD3LNd16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD3LNd32; break; - } - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - N->getOperand(5), getI32Imm(Lane), Chain }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 8); - } - // Quad registers are handled by extracting subregs, doing the load, - // and then inserting the results as subregs. - EVT RegVT; - unsigned Opc2 = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vld3lane type"); - case MVT::v8i16: - Opc = ARM::VLD3LNq16a; - Opc2 = ARM::VLD3LNq16b; - RegVT = MVT::v4i16; - break; - case MVT::v4f32: - Opc = ARM::VLD3LNq32a; - Opc2 = ARM::VLD3LNq32b; - RegVT = MVT::v2f32; - break; - case MVT::v4i32: - Opc = ARM::VLD3LNq32a; - Opc2 = ARM::VLD3LNq32b; - RegVT = MVT::v2i32; - break; - } - unsigned NumElts = RegVT.getVectorNumElements(); - int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; - - SDValue D0 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(3)); - SDValue D1 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(4)); - SDValue D2 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(5)); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, D0, D1, D2, - getI32Imm(Lane % NumElts), Chain }; - SDNode *VLdLn = CurDAG->getMachineNode((Lane < NumElts) ? Opc : Opc2, - dl, RegVT, RegVT, RegVT, - MVT::Other, Ops, 8); - SDValue Q0 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(3), - SDValue(VLdLn, 0)); - SDValue Q1 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(4), - SDValue(VLdLn, 1)); - SDValue Q2 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(5), - SDValue(VLdLn, 2)); - Chain = SDValue(VLdLn, 3); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - ReplaceUses(SDValue(N, 2), Q2); - ReplaceUses(SDValue(N, 3), Chain); - return NULL; + unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 }; + unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a }; + unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b }; + return SelectVLDLane(Op, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4lane: { - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) - return NULL; - SDValue Chain = N->getOperand(0); - unsigned Lane = cast<ConstantSDNode>(N->getOperand(7))->getZExtValue(); - if (VT.is64BitVector()) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vld4lane type"); - case MVT::v8i8: Opc = ARM::VLD4LNd8; break; - case MVT::v4i16: Opc = ARM::VLD4LNd16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD4LNd32; break; - } - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - N->getOperand(5), N->getOperand(6), - getI32Imm(Lane), Chain }; - std::vector<EVT> ResTys(4, VT); - ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 9); - } - // Quad registers are handled by extracting subregs, doing the load, - // and then inserting the results as subregs. - EVT RegVT; - unsigned Opc2 = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vld4lane type"); - case MVT::v8i16: - Opc = ARM::VLD4LNq16a; - Opc2 = ARM::VLD4LNq16b; - RegVT = MVT::v4i16; - break; - case MVT::v4f32: - Opc = ARM::VLD4LNq32a; - Opc2 = ARM::VLD4LNq32b; - RegVT = MVT::v2f32; - break; - case MVT::v4i32: - Opc = ARM::VLD4LNq32a; - Opc2 = ARM::VLD4LNq32b; - RegVT = MVT::v2i32; - break; - } - unsigned NumElts = RegVT.getVectorNumElements(); - int SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; - - SDValue D0 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(3)); - SDValue D1 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(4)); - SDValue D2 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(5)); - SDValue D3 = CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(6)); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, D0, D1, D2, D3, - getI32Imm(Lane % NumElts), Chain }; - std::vector<EVT> ResTys(4, RegVT); - ResTys.push_back(MVT::Other); - SDNode *VLdLn = CurDAG->getMachineNode((Lane < NumElts) ? Opc : Opc2, - dl, ResTys, Ops, 9); - SDValue Q0 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(3), - SDValue(VLdLn, 0)); - SDValue Q1 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(4), - SDValue(VLdLn, 1)); - SDValue Q2 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(5), - SDValue(VLdLn, 2)); - SDValue Q3 = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(6), - SDValue(VLdLn, 3)); - Chain = SDValue(VLdLn, 4); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - ReplaceUses(SDValue(N, 2), Q2); - ReplaceUses(SDValue(N, 3), Q3); - ReplaceUses(SDValue(N, 4), Chain); - return NULL; + unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 }; + unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a }; + unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b }; + return SelectVLDLane(Op, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2: { |