diff options
Diffstat (limited to 'lib/Target/ARM/ARMInstrNEON.td')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 165 |
1 files changed, 145 insertions, 20 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index b32b5d2..c02bb3b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -6372,6 +6372,32 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, dsub_0)>; } +// The following class definition is basically a copy of the +// Lengthen_HalfSingle definition above, however with an additional parameter +// "RevLanes" to select the correct VREV32dXX instruction. This is to convert +// data loaded by VLD1LN into proper vector format in big endian mode. +multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string InsnLanes, string InsnTy, string RevLanes> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; +} + // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length. // @@ -6406,6 +6432,36 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, dsub_0))>; } +// The following class definition is basically a copy of the +// Lengthen_Double definition above, however with an additional parameter +// "RevLanes" to select the correct VREV32dXX instruction. This is to convert +// data loaded by VLD1LN into proper vector format in big endian mode. +multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty, string RevLanes> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; +} + // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length, but which ends up only // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). @@ -6443,33 +6499,102 @@ multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, dsub_0)>; } +// The following class definition is basically a copy of the +// Lengthen_HalfDouble definition above, however with an additional VREV16d8 +// instruction to convert data loaded by VLD1LN into proper vector format +// in big endian mode. +multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; +} + defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 -defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 -defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 +let Predicates = [IsLE] in { + defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 + defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 -// Double lengthening - v4i8 -> v4i16 -> v4i32 -defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; -// v2i8 -> v2i16 -> v2i32 -defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; -// v2i16 -> v2i32 -> v2i64 -defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; + // Double lengthening - v4i8 -> v4i16 -> v4i32 + defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; + // v2i8 -> v2i16 -> v2i32 + defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; + // v2i16 -> v2i32 -> v2i64 + defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; +} + +let Predicates = [IsBE] in { + defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 + defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 + + // Double lengthening - v4i8 -> v4i16 -> v4i32 + defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; + // v2i8 -> v2i16 -> v2i32 + defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; + // v2i16 -> v2i32 -> v2i64 + defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; +} // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 -def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), - (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +} +// The following patterns are basically a copy of the patterns above, +// however with an additional VREV16d instruction to convert data +// loaded by VLD1LN into proper vector format in big endian mode. +let Predicates = [IsBE] in { + def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; +} //===----------------------------------------------------------------------===// // Assembler aliases |