diff options
author | Hao Liu <Hao.Liu@arm.com> | 2013-10-10 17:00:52 +0000 |
---|---|---|
committer | Hao Liu <Hao.Liu@arm.com> | 2013-10-10 17:00:52 +0000 |
commit | 6a5a667517160ca1b557002a29d08868ae029451 (patch) | |
tree | ab7bac232ae99d6b321cad35b0d0a5d8fbd39fcf /lib/Target/AArch64/AArch64InstrNEON.td | |
parent | 812ddcc50f8bc3ec6ce115863ff2263815906aaf (diff) | |
download | external_llvm-6a5a667517160ca1b557002a29d08868ae029451.zip external_llvm-6a5a667517160ca1b557002a29d08868ae029451.tar.gz external_llvm-6a5a667517160ca1b557002a29d08868ae029451.tar.bz2 |
Implement AArch64 vector load/store multiple N-element structure class SIMD(lselem).
Including following 14 instructions:
4 ld1 insts: load multiple 1-element structure to sequential 1/2/3/4 registers.
ld2/ld3/ld4: load multiple N-element structure to sequential N registers (N=2,3,4).
4 st1 insts: store multiple 1-element structure from sequential 1/2/3/4 registers.
st2/st3/st4: store multiple N-element structure from sequential N registers (N = 2,3,4).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192361 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/AArch64/AArch64InstrNEON.td')
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index a9f6061..355de53 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -2982,6 +2982,132 @@ defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", // End of implementation for instruction class (3V Diff) +// The followings are vector load/store multiple N-element structure +// (class SIMD lselem). + +// ld1: load multiple 1-element structure to 1/2/3/4 registers. +// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4). +// The structure consists of a sequence of sets of N values. +// The first element of the structure is placed in the first lane +// of the first first vector, the second element in the first lane +// of the second vector, and so on. +// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into +// the three 64-bit vectors list {BA, DC, FE}. +// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three +// 64-bit vectors list {DA, EB, FC}. +// Store instructions store multiple structure to N registers like load. + + +class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size, + RegisterOperand VecList, string asmop> + : NeonI_LdStMult<q, 1, opcode, size, + (outs VecList:$Rt), (ins GPR64xsp:$Rn), + asmop # "\t$Rt, [$Rn]", + [], + NoItinerary> { + let mayLoad = 1; + let neverHasSideEffects = 1; +} + +multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> { + def _8B : NeonI_LDVList<0, opcode, 0b00, + !cast<RegisterOperand>(List # "8B_operand"), asmop>; + + def _4H : NeonI_LDVList<0, opcode, 0b01, + !cast<RegisterOperand>(List # "4H_operand"), asmop>; + + def _2S : NeonI_LDVList<0, opcode, 0b10, + !cast<RegisterOperand>(List # "2S_operand"), asmop>; + + def _16B : NeonI_LDVList<1, opcode, 0b00, + !cast<RegisterOperand>(List # "16B_operand"), asmop>; + + def _8H : NeonI_LDVList<1, opcode, 0b01, + !cast<RegisterOperand>(List # "8H_operand"), asmop>; + + def _4S : NeonI_LDVList<1, opcode, 0b10, + !cast<RegisterOperand>(List # "4S_operand"), asmop>; + + def _2D : NeonI_LDVList<1, opcode, 0b11, + !cast<RegisterOperand>(List # "2D_operand"), asmop>; +} + +// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4) +defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">; +def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">; + +defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; + +defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; + +defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; + +// Load multiple 1-element structure to N consecutive registers (N = 2,3,4) +defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">; +def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">; + +defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">; +def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">; + +defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">; +def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">; + +class NeonI_STVList<bit q, bits<4> opcode, bits<2> size, + RegisterOperand VecList, string asmop> + : NeonI_LdStMult<q, 0, opcode, size, + (outs), (ins GPR64xsp:$Rn, VecList:$Rt), + asmop # "\t$Rt, [$Rn]", + [], + NoItinerary> { + let mayStore = 1; + let neverHasSideEffects = 1; +} + +multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> { + def _8B : NeonI_STVList<0, opcode, 0b00, + !cast<RegisterOperand>(List # "8B_operand"), asmop>; + + def _4H : NeonI_STVList<0, opcode, 0b01, + !cast<RegisterOperand>(List # "4H_operand"), asmop>; + + def _2S : NeonI_STVList<0, opcode, 0b10, + !cast<RegisterOperand>(List # "2S_operand"), asmop>; + + def _16B : NeonI_STVList<1, opcode, 0b00, + !cast<RegisterOperand>(List # "16B_operand"), asmop>; + + def _8H : NeonI_STVList<1, opcode, 0b01, + !cast<RegisterOperand>(List # "8H_operand"), asmop>; + + def _4S : NeonI_STVList<1, opcode, 0b10, + !cast<RegisterOperand>(List # "4S_operand"), asmop>; + + def _2D : NeonI_STVList<1, opcode, 0b11, + !cast<RegisterOperand>(List # "2D_operand"), asmop>; +} + +// Store multiple N-element structures from N registers (N = 1,2,3,4) +defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; +def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">; + +defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; + +defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; + +defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; + +// Store multiple 1-element structures from N consecutive registers (N = 2,3,4) +defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">; +def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; + +defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">; +def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; + +defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">; +def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; + +// End of vector load/store multiple N-element structure(class SIMD lselem) + // Scalar Arithmetic class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> |