diff options
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 79 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 18 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 21 | ||||
-rw-r--r-- | lib/Target/ARM/NEONPreAllocPass.cpp | 22 |
4 files changed, 81 insertions, 59 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 56c6468..9b0a91b 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -48,8 +48,8 @@ namespace { void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); - void ExpandVST4(MachineBasicBlock::iterator &MBBI, unsigned Opc, - bool hasWriteBack, NEONRegSpacing RegSpc); + void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc, + bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); }; char ARMExpandPseudo::ID = 0; } @@ -72,11 +72,11 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, } } -/// ExpandVST4 - Translate VST4 pseudo instructions with QQ or QQQQ register -/// operands to real VST4 instructions with 4 D register operands. -void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool hasWriteBack, - NEONRegSpacing RegSpc) { +/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register +/// operands to real VST instructions with D register operands. +void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool hasWriteBack, + NEONRegSpacing RegSpc, unsigned NumRegs) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); @@ -111,7 +111,7 @@ void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI, D2 = TRI->getSubReg(SrcReg, ARM::dsub_4); D3 = TRI->getSubReg(SrcReg, ARM::dsub_6); } else { - assert(RegSpc == OddDblSpc && "unknown register spacing for VST4"); + assert(RegSpc == OddDblSpc && "unknown register spacing for VST"); D0 = TRI->getSubReg(SrcReg, ARM::dsub_1); D1 = TRI->getSubReg(SrcReg, ARM::dsub_3); D2 = TRI->getSubReg(SrcReg, ARM::dsub_5); @@ -120,8 +120,9 @@ void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI, MIB.addReg(D0, getKillRegState(SrcIsKill)) .addReg(D1, getKillRegState(SrcIsKill)) - .addReg(D2, getKillRegState(SrcIsKill)) - .addReg(D3, getKillRegState(SrcIsKill)); + .addReg(D2, getKillRegState(SrcIsKill)); + if (NumRegs > 3) + MIB.addReg(D3, getKillRegState(SrcIsKill)); MIB = AddDefaultPred(MIB); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); @@ -223,35 +224,63 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MI.eraseFromParent(); } + case ARM::VST3d8Pseudo: + ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break; + case ARM::VST3d16Pseudo: + ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break; + case ARM::VST3d32Pseudo: + ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break; + case ARM::VST1d64TPseudo: + ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break; + case ARM::VST3d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break; + case ARM::VST3d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break; + case ARM::VST3d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break; + case ARM::VST1d64TPseudo_UPD: + ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break; + case ARM::VST3q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q8oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break; + case ARM::VST3q16oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break; + case ARM::VST3q32oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break; + case ARM::VST4d8Pseudo: - ExpandVST4(MBBI, ARM::VST4d8, false, SingleSpc); break; + ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break; case ARM::VST4d16Pseudo: - ExpandVST4(MBBI, ARM::VST4d16, false, SingleSpc); break; + ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break; case ARM::VST4d32Pseudo: - ExpandVST4(MBBI, ARM::VST4d32, false, SingleSpc); break; + ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break; case ARM::VST1d64QPseudo: - ExpandVST4(MBBI, ARM::VST1d64Q, false, SingleSpc); break; + ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break; case ARM::VST4d8Pseudo_UPD: - ExpandVST4(MBBI, ARM::VST4d8_UPD, true, SingleSpc); break; + ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break; case ARM::VST4d16Pseudo_UPD: - ExpandVST4(MBBI, ARM::VST4d16_UPD, true, SingleSpc); break; + ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break; case ARM::VST4d32Pseudo_UPD: - ExpandVST4(MBBI, ARM::VST4d32_UPD, true, SingleSpc); break; + ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break; case ARM::VST1d64QPseudo_UPD: - ExpandVST4(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc); break; + ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break; case ARM::VST4q8Pseudo_UPD: - ExpandVST4(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc); break; + ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break; case ARM::VST4q16Pseudo_UPD: - ExpandVST4(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc); break; + ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break; case ARM::VST4q32Pseudo_UPD: - ExpandVST4(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc); break; + ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break; case ARM::VST4q8oddPseudo_UPD: - ExpandVST4(MBBI, ARM::VST4q8_UPD, true, OddDblSpc); break; + ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break; case ARM::VST4q16oddPseudo_UPD: - ExpandVST4(MBBI, ARM::VST4q16_UPD, true, OddDblSpc); break; + ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break; case ARM::VST4q32oddPseudo_UPD: - ExpandVST4(MBBI, ARM::VST4q32_UPD, true, OddDblSpc); break; - break; + ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break; } if (ModifiedOp) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 3619084..6ba13a7 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1262,7 +1262,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // FIXME: This is a temporary flag to distinguish VSTs that have been // converted to pseudo instructions. - bool usePseudoInstrs = (NumVecs == 4); + bool usePseudoInstrs = (NumVecs >= 3); if (is64BitVector) { if (NumVecs >= 2) { @@ -2317,14 +2317,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case Intrinsic::arm_neon_vst3: { - unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, - ARM::VST3d32, ARM::VST1d64T }; - unsigned QOpcodes0[] = { ARM::VST3q8_UPD, - ARM::VST3q16_UPD, - ARM::VST3q32_UPD }; - unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD, - ARM::VST3q16odd_UPD, - ARM::VST3q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo, + ARM::VST3d32Pseudo, ARM::VST1d64TPseudo }; + unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, + ARM::VST3q16Pseudo_UPD, + ARM::VST3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, + ARM::VST3q16oddPseudo_UPD, + ARM::VST3q32oddPseudo_UPD }; return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index cf697b2..057d9bf 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -560,6 +560,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">; def VST1d32T_UPD : VST1D3WB<0b1000, "32">; def VST1d64T_UPD : VST1D3WB<0b1100, "64">; +def VST1d64TPseudo : VSTQQPseudo; +def VST1d64TPseudo_UPD : VSTQQWBPseudo; + // ...with 4 registers (some of these are only for the disassembler): class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), @@ -644,6 +647,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">; def VST3d16 : VST3D<0b0100, 0b0100, "16">; def VST3d32 : VST3D<0b0100, 0b1000, "32">; +def VST3d8Pseudo : VSTQQPseudo; +def VST3d16Pseudo : VSTQQPseudo; +def VST3d32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -656,6 +663,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; +def VST3d8Pseudo_UPD : VSTQQWBPseudo; +def VST3d16Pseudo_UPD : VSTQQWBPseudo; +def VST3d32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VST3q8 : VST3D<0b0101, 0b0000, "8">; def VST3q16 : VST3D<0b0101, 0b0100, "16">; @@ -664,10 +675,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; -def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; -def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST4 : Vector Store (multiple 4-element structures) class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 1439995..254ac23 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -215,10 +215,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST3d8: - case ARM::VST3d16: - case ARM::VST3d32: - case ARM::VST1d64T: case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: @@ -226,24 +222,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VST3q8_UPD: - case ARM::VST3q16_UPD: - case ARM::VST3q32_UPD: - FirstOpnd = 4; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST3q8odd_UPD: - case ARM::VST3q16odd_UPD: - case ARM::VST3q32odd_UPD: - FirstOpnd = 4; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - case ARM::VST3LNq16: case ARM::VST3LNq32: FirstOpnd = 2; |