aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64ISelDAGToDAG.cpp')
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp298
1 files changed, 298 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index a865564..3b0dd64 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -109,6 +109,23 @@ public:
SDNode* Select(SDNode*);
private:
+ /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
+ SDNode *SelectVLD(SDNode *N, unsigned NumVecs, const uint16_t *Opcode);
+
+ /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
+ SDNode *SelectVST(SDNode *N, unsigned NumVecs, const uint16_t *Opcodes);
+
+ // Form pairs of consecutive 64-bit/128-bit registers.
+ SDNode *createDPairNode(SDValue V0, SDValue V1);
+ SDNode *createQPairNode(SDValue V0, SDValue V1);
+
+ // Form sequences of 3 consecutive 64-bit/128-bit registers.
+ SDNode *createDTripleNode(SDValue V0, SDValue V1, SDValue V2);
+ SDNode *createQTripleNode(SDValue V0, SDValue V1, SDValue V2);
+
+ // Form sequences of 4 consecutive 64-bit/128-bit registers.
+ SDNode *createDQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);
};
}
@@ -390,6 +407,221 @@ SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
&Ops[0], Ops.size());
}
+SDNode *AArch64DAGToDAGISel::createDPairNode(SDValue V0, SDValue V1) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(AArch64::DPairRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v2i64,
+ Ops);
+}
+
+SDNode *AArch64DAGToDAGISel::createQPairNode(SDValue V0, SDValue V1) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(AArch64::QPairRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64,
+ Ops);
+}
+
+SDNode *AArch64DAGToDAGISel::createDTripleNode(SDValue V0, SDValue V1,
+ SDValue V2) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(AArch64::DTripleRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped,
+ Ops);
+}
+
+SDNode *AArch64DAGToDAGISel::createQTripleNode(SDValue V0, SDValue V1,
+ SDValue V2) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(AArch64::QTripleRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped,
+ Ops);
+}
+
+SDNode *AArch64DAGToDAGISel::createDQuadNode(SDValue V0, SDValue V1, SDValue V2,
+ SDValue V3) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(AArch64::DQuadRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::dsub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3,
+ SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64,
+ Ops);
+}
+
+SDNode *AArch64DAGToDAGISel::createQQuadNode(SDValue V0, SDValue V1, SDValue V2,
+ SDValue V3) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(AArch64::QQuadRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::qsub_3, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3,
+ SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v8i64,
+ Ops);
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+
+ EVT VT = N->getValueType(0);
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vector load type");
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1f64:
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ case MVT::v16i8: OpcodeIndex = 4; break;
+ case MVT::v8f16:
+ case MVT::v8i16: OpcodeIndex = 5; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 6; break;
+ case MVT::v2f64:
+ case MVT::v2i64: OpcodeIndex = 7; break;
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(N->getOperand(2)); // Push back the Memory Address
+ Ops.push_back(N->getOperand(0)); // Push back the Chain
+
+ std::vector<EVT> ResTys;
+ bool is64BitVector = VT.is64BitVector();
+
+ if (NumVecs == 1)
+ ResTys.push_back(VT);
+ else if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+
+ ResTys.push_back(MVT::Other); // Type of the Chain
+ SDLoc dl(N);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // If NumVecs > 1, the return result is a super register containing 2-4
+ // consecutive vector registers.
+ SDValue SuperReg = SDValue(VLd, 0);
+
+ unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ // Update users of the Chain
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+
+ return NULL;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ SDLoc dl(N);
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ unsigned Vec0Idx = 3;
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vector store type");
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1f64:
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ case MVT::v16i8: OpcodeIndex = 4; break;
+ case MVT::v8f16:
+ case MVT::v8i16: OpcodeIndex = 5; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 6; break;
+ case MVT::v2f64:
+ case MVT::v2i64: OpcodeIndex = 7; break;
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::Other); // Type for the Chain
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(N->getOperand(2)); // Push back the Memory Address
+
+ bool is64BitVector = VT.is64BitVector();
+
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue SrcReg;
+ if (NumVecs == 1)
+ SrcReg = V0;
+ else {
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2)
+ SrcReg = is64BitVector ? SDValue(createDPairNode(V0, V1), 0)
+ : SDValue(createQPairNode(V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ if (NumVecs == 3)
+ SrcReg = is64BitVector ? SDValue(createDTripleNode(V0, V1, V2), 0)
+ : SDValue(createQTripleNode(V0, V1, V2), 0);
+ else {
+ SDValue V3 = N->getOperand(Vec0Idx + 3);
+ SrcReg = is64BitVector ? SDValue(createDQuadNode(V0, V1, V2, V3), 0)
+ : SDValue(createQQuadNode(V0, V1, V2, V3), 0);
+ }
+ }
+ }
+ Ops.push_back(SrcReg);
+
+ // Push back the Chain
+ Ops.push_back(N->getOperand(0));
+
+ // Transfer memoperands.
+ SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+ return VSt;
+}
+
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@@ -536,6 +768,72 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
Node = ResNode;
break;
}
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vld1: {
+ static const uint16_t Opcodes[] = { AArch64::LD1_8B, AArch64::LD1_4H,
+ AArch64::LD1_2S, AArch64::LD1_1D,
+ AArch64::LD1_16B, AArch64::LD1_8H,
+ AArch64::LD1_4S, AArch64::LD1_2D };
+ return SelectVLD(Node, 1, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld2: {
+ static const uint16_t Opcodes[] = { AArch64::LD2_8B, AArch64::LD2_4H,
+ AArch64::LD2_2S, AArch64::LD1_2V_1D,
+ AArch64::LD2_16B, AArch64::LD2_8H,
+ AArch64::LD2_4S, AArch64::LD2_2D };
+ return SelectVLD(Node, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld3: {
+ static const uint16_t Opcodes[] = { AArch64::LD3_8B, AArch64::LD3_4H,
+ AArch64::LD3_2S, AArch64::LD1_3V_1D,
+ AArch64::LD3_16B, AArch64::LD3_8H,
+ AArch64::LD3_4S, AArch64::LD3_2D };
+ return SelectVLD(Node, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld4: {
+ static const uint16_t Opcodes[] = { AArch64::LD4_8B, AArch64::LD4_4H,
+ AArch64::LD4_2S, AArch64::LD1_4V_1D,
+ AArch64::LD4_16B, AArch64::LD4_8H,
+ AArch64::LD4_4S, AArch64::LD4_2D };
+ return SelectVLD(Node, 4, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst1: {
+ static const uint16_t Opcodes[] = { AArch64::ST1_8B, AArch64::ST1_4H,
+ AArch64::ST1_2S, AArch64::ST1_1D,
+ AArch64::ST1_16B, AArch64::ST1_8H,
+ AArch64::ST1_4S, AArch64::ST1_2D };
+ return SelectVST(Node, 1, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst2: {
+ static const uint16_t Opcodes[] = { AArch64::ST2_8B, AArch64::ST2_4H,
+ AArch64::ST2_2S, AArch64::ST1_2V_1D,
+ AArch64::ST2_16B, AArch64::ST2_8H,
+ AArch64::ST2_4S, AArch64::ST2_2D };
+ return SelectVST(Node, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst3: {
+ static const uint16_t Opcodes[] = { AArch64::ST3_8B, AArch64::ST3_4H,
+ AArch64::ST3_2S, AArch64::ST1_3V_1D,
+ AArch64::ST3_16B, AArch64::ST3_8H,
+ AArch64::ST3_4S, AArch64::ST3_2D };
+ return SelectVST(Node, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst4: {
+ static const uint16_t Opcodes[] = { AArch64::ST4_8B, AArch64::ST4_4H,
+ AArch64::ST4_2S, AArch64::ST1_4V_1D,
+ AArch64::ST4_16B, AArch64::ST4_8H,
+ AArch64::ST4_4S, AArch64::ST4_2D };
+ return SelectVST(Node, 4, Opcodes);
+ }
+ }
+ break;
+ }
default:
break; // Let generic code handle it
}