diff options
author | Hao Liu <Hao.Liu@arm.com> | 2013-11-05 03:39:32 +0000 |
---|---|---|
committer | Hao Liu <Hao.Liu@arm.com> | 2013-11-05 03:39:32 +0000 |
commit | 591c2f738a3e12026ff5504a486d54fc21fb3049 (patch) | |
tree | 6909db3910a1e52022a3166d0b2d7648269f5bc3 /lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | |
parent | 8263dcdf23bc534405745959c97cbfd562362458 (diff) | |
download | external_llvm-591c2f738a3e12026ff5504a486d54fc21fb3049.zip external_llvm-591c2f738a3e12026ff5504a486d54fc21fb3049.tar.gz external_llvm-591c2f738a3e12026ff5504a486d54fc21fb3049.tar.bz2 |
Implement AArch64 post-index vector load/store multiple N-element structure class SIMD(lselem-post).
Including following 14 instructions:
4 ld1 insts: post-index load multiple 1-element structure to sequential 1/2/3/4 registers.
ld2/ld3/ld4: post-index load multiple N-element structure to sequential N registers (N=2,3,4).
4 st1 insts: post-index store multiple 1-element structure from sequential 1/2/3/4 registers.
st2/st3/st4: post-index store multiple N-element structure from sequential N registers (N = 2,3,4).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194043 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/AArch64/AArch64ISelDAGToDAG.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 204 |
1 files changed, 191 insertions, 13 deletions
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 3b0dd64..fb4e19c 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -110,10 +110,12 @@ public: SDNode* Select(SDNode*); private: /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVLD(SDNode *N, unsigned NumVecs, const uint16_t *Opcode); + SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating, + const uint16_t *Opcode); /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVST(SDNode *N, unsigned NumVecs, const uint16_t *Opcodes); + SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating, + const uint16_t *Opcodes); // Form pairs of consecutive 64-bit/128-bit registers. SDNode *createDPairNode(SDValue V0, SDValue V1); @@ -485,7 +487,88 @@ SDNode *AArch64DAGToDAGISel::createQQuadNode(SDValue V0, SDValue V1, SDValue V2, Ops); } +// Get the register stride update opcode of a VLD/VST instruction that +// is otherwise equivalent to the given fixed stride updating instruction. +static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { + switch (Opc) { + default: break; + case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register; + case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register; + case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register; + case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register; + case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register; + case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register; + case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register; + case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register; + + case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register; + case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register; + case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register; + case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register; + case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register; + case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register; + case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register; + case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register; + + case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register; + case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register; + case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register; + case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register; + case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register; + case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register; + case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register; + case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register; + + case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register; + case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register; + case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register; + case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register; + case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register; + case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register; + case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register; + case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register; + + case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register; + case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register; + case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register; + case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register; + case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register; + case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register; + case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register; + case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register; + + case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register; + case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register; + case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register; + case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register; + case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register; + case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register; + case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register; + case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register; + + case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register; + case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register; + case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register; + case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register; + case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register; + case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register; + case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register; + case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register; + + case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register; + case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register; + case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register; + case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register; + case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register; + case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register; + case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register; + case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register; + } + return Opc; // If not one we handle, return it unchanged. +} + SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, + bool isUpdating, const uint16_t *Opcodes) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); @@ -510,7 +593,16 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, unsigned Opc = Opcodes[OpcodeIndex]; SmallVector<SDValue, 2> Ops; - Ops.push_back(N->getOperand(2)); // Push back the Memory Address + unsigned AddrOpIdx = isUpdating ? 1 : 2; + Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address + + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + } + Ops.push_back(N->getOperand(0)); // Push back the Chain std::vector<EVT> ResTys; @@ -526,6 +618,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, ResTys.push_back(ResTy); } + if (isUpdating) + ResTys.push_back(MVT::i64); // Type of the updated register ResTys.push_back(MVT::Other); // Type of the Chain SDLoc dl(N); SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); @@ -548,11 +642,14 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); // Update users of the Chain ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); return NULL; } SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, + bool isUpdating, const uint16_t *Opcodes) { assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); SDLoc dl(N); @@ -560,7 +657,8 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - unsigned Vec0Idx = 3; + unsigned AddrOpIdx = isUpdating ? 1 : 2; + unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) EVT VT = N->getOperand(Vec0Idx).getValueType(); unsigned OpcodeIndex; switch (VT.getSimpleVT().SimpleTy) { @@ -582,11 +680,19 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned Opc = Opcodes[OpcodeIndex]; std::vector<EVT> ResTys; + if (isUpdating) + ResTys.push_back(MVT::i64); ResTys.push_back(MVT::Other); // Type for the Chain SmallVector<SDValue, 6> Ops; - Ops.push_back(N->getOperand(2)); // Push back the Memory Address + Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + } bool is64BitVector = VT.is64BitVector(); SDValue V0 = N->getOperand(Vec0Idx + 0); @@ -768,6 +874,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { Node = ResNode; break; } + case AArch64ISD::NEON_LD1_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed, + AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed, + AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed, + AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed + }; + return SelectVLD(Node, 1, true, Opcodes); + } + case AArch64ISD::NEON_LD2_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed, + AArch64::LD2WB_2S_fixed, AArch64::LD1WB2V_1D_fixed, + AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed, + AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed + }; + return SelectVLD(Node, 2, true, Opcodes); + } + case AArch64ISD::NEON_LD3_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed, + AArch64::LD3WB_2S_fixed, AArch64::LD1WB3V_1D_fixed, + AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed, + AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed + }; + return SelectVLD(Node, 3, true, Opcodes); + } + case AArch64ISD::NEON_LD4_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed, + AArch64::LD4WB_2S_fixed, AArch64::LD1WB4V_1D_fixed, + AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed, + AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed + }; + return SelectVLD(Node, 4, true, Opcodes); + } + case AArch64ISD::NEON_ST1_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed, + AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed, + AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed, + AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed + }; + return SelectVST(Node, 1, true, Opcodes); + } + case AArch64ISD::NEON_ST2_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed, + AArch64::ST2WB_2S_fixed, AArch64::ST1WB2V_1D_fixed, + AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed, + AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed + }; + return SelectVST(Node, 2, true, Opcodes); + } + case AArch64ISD::NEON_ST3_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed, + AArch64::ST3WB_2S_fixed, AArch64::ST1WB3V_1D_fixed, + AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed, + AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed + }; + return SelectVST(Node, 3, true, Opcodes); + } + case AArch64ISD::NEON_ST4_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed, + AArch64::ST4WB_2S_fixed, AArch64::ST1WB4V_1D_fixed, + AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed, + AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed + }; + return SelectVST(Node, 4, true, Opcodes); + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); @@ -780,56 +958,56 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::LD1_2S, AArch64::LD1_1D, AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D }; - return SelectVLD(Node, 1, Opcodes); + return SelectVLD(Node, 1, false, Opcodes); } case Intrinsic::arm_neon_vld2: { static const uint16_t Opcodes[] = { AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1_2V_1D, AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D }; - return SelectVLD(Node, 2, Opcodes); + return SelectVLD(Node, 2, false, Opcodes); } case Intrinsic::arm_neon_vld3: { static const uint16_t Opcodes[] = { AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1_3V_1D, AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D }; - return SelectVLD(Node, 3, Opcodes); + return SelectVLD(Node, 3, false, Opcodes); } case Intrinsic::arm_neon_vld4: { static const uint16_t Opcodes[] = { AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1_4V_1D, AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D }; - return SelectVLD(Node, 4, Opcodes); + return SelectVLD(Node, 4, false, Opcodes); } case Intrinsic::arm_neon_vst1: { static const uint16_t Opcodes[] = { AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D, AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D }; - return SelectVST(Node, 1, Opcodes); + return SelectVST(Node, 1, false, Opcodes); } case Intrinsic::arm_neon_vst2: { static const uint16_t Opcodes[] = { AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1_2V_1D, AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D }; - return SelectVST(Node, 2, Opcodes); + return SelectVST(Node, 2, false, Opcodes); } case Intrinsic::arm_neon_vst3: { static const uint16_t Opcodes[] = { AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1_3V_1D, AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D }; - return SelectVST(Node, 3, Opcodes); + return SelectVST(Node, 3, false, Opcodes); } case Intrinsic::arm_neon_vst4: { static const uint16_t Opcodes[] = { AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1_4V_1D, AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D }; - return SelectVST(Node, 4, Opcodes); + return SelectVST(Node, 4, false, Opcodes); } } break; |