diff options
Diffstat (limited to 'lib/Target/ARM/ARMInstrNEON.td')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 4e4aa19..fd9b823 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -854,6 +854,47 @@ def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) +class VLD2DUP<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), + (ins addrmode6:$Rn), IIC_VLD2dup, + "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} + +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; + +def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; +def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; +def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; + +// ...with double-spaced registers (not used for codegen): +def VLD2DUPd8Q : VLD2DUP<{0,0,1,?}, "8">; +def VLD2DUPd16Q : VLD2DUP<{0,1,1,?}, "16">; +def VLD2DUPd32Q : VLD2DUP<{1,0,1,?}, "32">; + +// ...with address register writeback: +class VLD2DUPWB<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2dupu, + "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} + +def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; +def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; +def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; + +def VLD2DUPd8Q_UPD : VLD2DUPWB<{0,0,1,0}, "8">; +def VLD2DUPd16Q_UPD : VLD2DUPWB<{0,1,1,?}, "16">; +def VLD2DUPd32Q_UPD : VLD2DUPWB<{1,0,1,?}, "32">; + +def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; +def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; +def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; + // VLD3DUP : Vector Load (single 3-element structure to all lanes) // VLD4DUP : Vector Load (single 4-element structure to all lanes) // FIXME: Not yet implemented. |