aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
diff options
context:
space:
mode:
authorHao Liu <Hao.Liu@arm.com>2013-11-05 03:39:32 +0000
committerHao Liu <Hao.Liu@arm.com>2013-11-05 03:39:32 +0000
commit591c2f738a3e12026ff5504a486d54fc21fb3049 (patch)
tree6909db3910a1e52022a3166d0b2d7648269f5bc3 /lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
parent8263dcdf23bc534405745959c97cbfd562362458 (diff)
downloadexternal_llvm-591c2f738a3e12026ff5504a486d54fc21fb3049.zip
external_llvm-591c2f738a3e12026ff5504a486d54fc21fb3049.tar.gz
external_llvm-591c2f738a3e12026ff5504a486d54fc21fb3049.tar.bz2
Implement AArch64 post-index vector load/store multiple N-element structure class SIMD(lselem-post).
Including following 14 instructions: 4 ld1 insts: post-index load multiple 1-element structure to sequential 1/2/3/4 registers. ld2/ld3/ld4: post-index load multiple N-element structure to sequential N registers (N=2,3,4). 4 st1 insts: post-index store multiple 1-element structure from sequential 1/2/3/4 registers. st2/st3/st4: post-index store multiple N-element structure from sequential N registers (N = 2,3,4). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194043 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/AArch64/AArch64ISelDAGToDAG.cpp')
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp204
1 files changed, 191 insertions, 13 deletions
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 3b0dd64..fb4e19c 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -110,10 +110,12 @@ public:
SDNode* Select(SDNode*);
private:
/// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
- SDNode *SelectVLD(SDNode *N, unsigned NumVecs, const uint16_t *Opcode);
+ SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
+ const uint16_t *Opcode);
/// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
- SDNode *SelectVST(SDNode *N, unsigned NumVecs, const uint16_t *Opcodes);
+ SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating,
+ const uint16_t *Opcodes);
// Form pairs of consecutive 64-bit/128-bit registers.
SDNode *createDPairNode(SDValue V0, SDValue V1);
@@ -485,7 +487,88 @@ SDNode *AArch64DAGToDAGISel::createQQuadNode(SDValue V0, SDValue V1, SDValue V2,
Ops);
}
+// Get the register stride update opcode of a VLD/VST instruction that
+// is otherwise equivalent to the given fixed stride updating instruction.
+static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
+ case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
+ case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
+ case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
+ case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
+ case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
+ case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
+ case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
+
+ case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
+ case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
+ case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
+ case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register;
+ case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
+ case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
+ case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
+ case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
+
+ case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
+ case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
+ case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
+ case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register;
+ case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
+ case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
+ case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
+ case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
+
+ case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
+ case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
+ case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
+ case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register;
+ case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
+ case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
+ case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
+ case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
+
+ case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
+ case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
+ case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
+ case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
+ case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
+ case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
+ case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
+ case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
+
+ case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
+ case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
+ case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
+ case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register;
+ case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
+ case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
+ case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
+ case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
+
+ case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
+ case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
+ case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
+ case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register;
+ case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
+ case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
+ case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
+ case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
+
+ case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
+ case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
+ case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
+ case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register;
+ case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
+ case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
+ case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
+ case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
+ }
+ return Opc; // If not one we handle, return it unchanged.
+}
+
SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
+ bool isUpdating,
const uint16_t *Opcodes) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
@@ -510,7 +593,16 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned Opc = Opcodes[OpcodeIndex];
SmallVector<SDValue, 2> Ops;
- Ops.push_back(N->getOperand(2)); // Push back the Memory Address
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+
Ops.push_back(N->getOperand(0)); // Push back the Chain
std::vector<EVT> ResTys;
@@ -526,6 +618,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
ResTys.push_back(ResTy);
}
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
ResTys.push_back(MVT::Other); // Type of the Chain
SDLoc dl(N);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
@@ -548,11 +642,14 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
// Update users of the Chain
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
return NULL;
}
SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
+ bool isUpdating,
const uint16_t *Opcodes) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);
@@ -560,7 +657,8 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- unsigned Vec0Idx = 3;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
EVT VT = N->getOperand(Vec0Idx).getValueType();
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
@@ -582,11 +680,19 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned Opc = Opcodes[OpcodeIndex];
std::vector<EVT> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i64);
ResTys.push_back(MVT::Other); // Type for the Chain
SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(2)); // Push back the Memory Address
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
bool is64BitVector = VT.is64BitVector();
SDValue V0 = N->getOperand(Vec0Idx + 0);
@@ -768,6 +874,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
Node = ResNode;
break;
}
+ case AArch64ISD::NEON_LD1_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed,
+ AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed,
+ AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
+ AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed
+ };
+ return SelectVLD(Node, 1, true, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed,
+ AArch64::LD2WB_2S_fixed, AArch64::LD1WB2V_1D_fixed,
+ AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
+ AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed
+ };
+ return SelectVLD(Node, 2, true, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed,
+ AArch64::LD3WB_2S_fixed, AArch64::LD1WB3V_1D_fixed,
+ AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
+ AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed
+ };
+ return SelectVLD(Node, 3, true, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed,
+ AArch64::LD4WB_2S_fixed, AArch64::LD1WB4V_1D_fixed,
+ AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
+ AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed
+ };
+ return SelectVLD(Node, 4, true, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed,
+ AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed,
+ AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
+ AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed
+ };
+ return SelectVST(Node, 1, true, Opcodes);
+ }
+ case AArch64ISD::NEON_ST2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed,
+ AArch64::ST2WB_2S_fixed, AArch64::ST1WB2V_1D_fixed,
+ AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
+ AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed
+ };
+ return SelectVST(Node, 2, true, Opcodes);
+ }
+ case AArch64ISD::NEON_ST3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed,
+ AArch64::ST3WB_2S_fixed, AArch64::ST1WB3V_1D_fixed,
+ AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
+ AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed
+ };
+ return SelectVST(Node, 3, true, Opcodes);
+ }
+ case AArch64ISD::NEON_ST4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed,
+ AArch64::ST4WB_2S_fixed, AArch64::ST1WB4V_1D_fixed,
+ AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
+ AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed
+ };
+ return SelectVST(Node, 4, true, Opcodes);
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
@@ -780,56 +958,56 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::LD1_2S, AArch64::LD1_1D,
AArch64::LD1_16B, AArch64::LD1_8H,
AArch64::LD1_4S, AArch64::LD1_2D };
- return SelectVLD(Node, 1, Opcodes);
+ return SelectVLD(Node, 1, false, Opcodes);
}
case Intrinsic::arm_neon_vld2: {
static const uint16_t Opcodes[] = { AArch64::LD2_8B, AArch64::LD2_4H,
AArch64::LD2_2S, AArch64::LD1_2V_1D,
AArch64::LD2_16B, AArch64::LD2_8H,
AArch64::LD2_4S, AArch64::LD2_2D };
- return SelectVLD(Node, 2, Opcodes);
+ return SelectVLD(Node, 2, false, Opcodes);
}
case Intrinsic::arm_neon_vld3: {
static const uint16_t Opcodes[] = { AArch64::LD3_8B, AArch64::LD3_4H,
AArch64::LD3_2S, AArch64::LD1_3V_1D,
AArch64::LD3_16B, AArch64::LD3_8H,
AArch64::LD3_4S, AArch64::LD3_2D };
- return SelectVLD(Node, 3, Opcodes);
+ return SelectVLD(Node, 3, false, Opcodes);
}
case Intrinsic::arm_neon_vld4: {
static const uint16_t Opcodes[] = { AArch64::LD4_8B, AArch64::LD4_4H,
AArch64::LD4_2S, AArch64::LD1_4V_1D,
AArch64::LD4_16B, AArch64::LD4_8H,
AArch64::LD4_4S, AArch64::LD4_2D };
- return SelectVLD(Node, 4, Opcodes);
+ return SelectVLD(Node, 4, false, Opcodes);
}
case Intrinsic::arm_neon_vst1: {
static const uint16_t Opcodes[] = { AArch64::ST1_8B, AArch64::ST1_4H,
AArch64::ST1_2S, AArch64::ST1_1D,
AArch64::ST1_16B, AArch64::ST1_8H,
AArch64::ST1_4S, AArch64::ST1_2D };
- return SelectVST(Node, 1, Opcodes);
+ return SelectVST(Node, 1, false, Opcodes);
}
case Intrinsic::arm_neon_vst2: {
static const uint16_t Opcodes[] = { AArch64::ST2_8B, AArch64::ST2_4H,
AArch64::ST2_2S, AArch64::ST1_2V_1D,
AArch64::ST2_16B, AArch64::ST2_8H,
AArch64::ST2_4S, AArch64::ST2_2D };
- return SelectVST(Node, 2, Opcodes);
+ return SelectVST(Node, 2, false, Opcodes);
}
case Intrinsic::arm_neon_vst3: {
static const uint16_t Opcodes[] = { AArch64::ST3_8B, AArch64::ST3_4H,
AArch64::ST3_2S, AArch64::ST1_3V_1D,
AArch64::ST3_16B, AArch64::ST3_8H,
AArch64::ST3_4S, AArch64::ST3_2D };
- return SelectVST(Node, 3, Opcodes);
+ return SelectVST(Node, 3, false, Opcodes);
}
case Intrinsic::arm_neon_vst4: {
static const uint16_t Opcodes[] = { AArch64::ST4_8B, AArch64::ST4_4H,
AArch64::ST4_2S, AArch64::ST1_4V_1D,
AArch64::ST4_16B, AArch64::ST4_8H,
AArch64::ST4_4S, AArch64::ST4_2D };
- return SelectVST(Node, 4, Opcodes);
+ return SelectVST(Node, 4, false, Opcodes);
}
}
break;