diff options
author | Bob Wilson <bob.wilson@apple.com> | 2009-10-07 17:24:55 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2009-10-07 17:24:55 +0000 |
commit | ff8952e8a9dc01bba9605e90bd3b823d3cf43619 (patch) | |
tree | ed72b51350200df9cfb08c2584e8f637c45cd666 /lib | |
parent | 228c08b8ddff75a9d5d617ab12eb683a25fe17a8 (diff) | |
download | external_llvm-ff8952e8a9dc01bba9605e90bd3b823d3cf43619.zip external_llvm-ff8952e8a9dc01bba9605e90bd3b823d3cf43619.tar.gz external_llvm-ff8952e8a9dc01bba9605e90bd3b823d3cf43619.tar.bz2 |
Add codegen support for NEON vld3 intrinsics with 128-bit vectors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@83471 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 55 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 15 | ||||
-rw-r--r-- | lib/Target/ARM/NEONPreAllocPass.cpp | 35 |
3 files changed, 92 insertions, 13 deletions
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ebff3e1..bdc2940 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -444,7 +444,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, SDValue &Opc) { Addr = N; - // The optional writeback is handled in ARMLoadStoreOpt. + // Default to no writeback. Update = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); return true; @@ -1388,16 +1388,57 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld3 type"); + case MVT::v8i8: Opc = ARM::VLD3d8; break; + case MVT::v4i16: Opc = ARM::VLD3d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD3d32; break; + } + SDValue Chain = N->getOperand(0); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 4); + } + // Quad registers are loaded with two separate instructions, where one + // loads the even registers and the other loads the odd registers. + EVT RegVT = VT; + unsigned Opc2 = 0; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld3 type"); - case MVT::v8i8: Opc = ARM::VLD3d8; break; - case MVT::v4i16: Opc = ARM::VLD3d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VLD3d32; break; + case MVT::v16i8: + Opc = ARM::VLD3q8a; Opc2 = ARM::VLD3q8b; RegVT = MVT::v8i8; break; + case MVT::v8i16: + Opc = ARM::VLD3q16a; Opc2 = ARM::VLD3q16b; RegVT = MVT::v4i16; break; + case MVT::v4f32: + Opc = ARM::VLD3q32a; Opc2 = ARM::VLD3q32b; RegVT = MVT::v2f32; break; + case MVT::v4i32: + Opc = ARM::VLD3q32a; Opc2 = ARM::VLD3q32b; RegVT = MVT::v2i32; break; } SDValue Chain = N->getOperand(0); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 4); + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + std::vector<EVT> ResTys(3, RegVT); + ResTys.push_back(MemAddr.getValueType()); + ResTys.push_back(MVT::Other); + + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4); + Chain = SDValue(VLdA, 4); + + const SDValue OpsB[] = { SDValue(VLdA, 3), MemUpdate, MemOpc, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc2, dl, ResTys, OpsB, 4); + Chain = SDValue(VLdB, 4); + + SDNode *Q0 = PairDRegs(VT, SDValue(VLdA, 0), SDValue(VLdB, 0)); + SDNode *Q1 = PairDRegs(VT, SDValue(VLdA, 1), SDValue(VLdB, 1)); + SDNode *Q2 = PairDRegs(VT, SDValue(VLdA, 2), SDValue(VLdB, 2)); + ReplaceUses(SDValue(N, 0), SDValue(Q0, 0)); + ReplaceUses(SDValue(N, 1), SDValue(Q1, 0)); + ReplaceUses(SDValue(N, 2), SDValue(Q2, 0)); + ReplaceUses(SDValue(N, 3), Chain); + return NULL; } case Intrinsic::arm_neon_vld4: { diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3f07d30..c7ff523 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -201,11 +201,26 @@ class VLD3D<string OpcodeStr> : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD3, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; +class VLD3WB<string OpcodeStr> + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr), IIC_VLD3, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), + "$addr.addr = $wb", []>; def VLD3d8 : VLD3D<"vld3.8">; def VLD3d16 : VLD3D<"vld3.16">; def VLD3d32 : VLD3D<"vld3.32">; +// vld3 to double-spaced even registers. +def VLD3q8a : VLD3WB<"vld3.8">; +def VLD3q16a : VLD3WB<"vld3.16">; +def VLD3q32a : VLD3WB<"vld3.32">; + +// vld3 to double-spaced odd registers. +def VLD3q8b : VLD3WB<"vld3.8">; +def VLD3q16b : VLD3WB<"vld3.16">; +def VLD3q32b : VLD3WB<"vld3.32">; + // VLD4 : Vector Load (multiple 4-element structures) class VLD4D<string OpcodeStr> : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index da1c662..fab62f6 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -36,8 +36,12 @@ namespace { char NEONPreAllocPass::ID = 0; } -static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, - unsigned &NumRegs) { +static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, + unsigned &Offset, unsigned &Stride) { + // Default to unit stride with no offset. + Stride = 1; + Offset = 0; + switch (Opcode) { default: break; @@ -69,6 +73,24 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, NumRegs = 3; return true; + case ARM::VLD3q8a: + case ARM::VLD3q16a: + case ARM::VLD3q32a: + FirstOpnd = 0; + NumRegs = 3; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD3q8b: + case ARM::VLD3q16b: + case ARM::VLD3q32b: + FirstOpnd = 0; + NumRegs = 3; + Offset = 1; + Stride = 2; + return true; + case ARM::VLD4d8: case ARM::VLD4d16: case ARM::VLD4d32: @@ -149,8 +171,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); for (; MBBI != E; ++MBBI) { MachineInstr *MI = &*MBBI; - unsigned FirstOpnd, NumRegs; - if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs)) + unsigned FirstOpnd, NumRegs, Offset, Stride; + if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; MachineBasicBlock::iterator NextI = next(MBBI); @@ -164,9 +186,10 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { // For now, just assign a fixed set of adjacent registers. // This leaves plenty of room for future improvements. static const unsigned NEONDRegs[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3 + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; - MO.setReg(NEONDRegs[R]); + MO.setReg(NEONDRegs[Offset + R * Stride]); if (MO.isUse()) { // Insert a copy from VirtReg. |