diff options
author | Bill Wendling <isanbard@gmail.com> | 2013-12-03 07:38:30 +0000 |
---|---|---|
committer | Bill Wendling <isanbard@gmail.com> | 2013-12-03 07:38:30 +0000 |
commit | 38348240d179131d9292c28c7540ced97b29ed8b (patch) | |
tree | e5fe674886165856075581c496cdf11268468ad9 /lib | |
parent | cdf67d5791d044a5f217114e18eb8d6242222b98 (diff) | |
download | external_llvm-38348240d179131d9292c28c7540ced97b29ed8b.zip external_llvm-38348240d179131d9292c28c7540ced97b29ed8b.tar.gz external_llvm-38348240d179131d9292c28c7540ced97b29ed8b.tar.bz2 |
Merging r196151:
------------------------------------------------------------------------
r196151 | mcrosier | 2013-12-02 13:05:16 -0800 (Mon, 02 Dec 2013) | 2 lines
[AArch64] Implemented vcopy_lane patterns using scalar DUP instruction.
Patch by Ana Pazos!
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196230 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 125 |
1 files changed, 97 insertions, 28 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 6a339c8..581ebae 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -5731,28 +5731,13 @@ multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy, OpNImm:$Imm))>; } -multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, - ValueType ResTy, ValueType OpTy> { - def : Pat<(ResTy (GetLow VPR128:$Rn)), - (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; - def : Pat<(ResTy (GetHigh VPR128:$Rn)), - (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; -} - -defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>; -defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>; -defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>; -defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>; -defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>; -defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>; - // Patterns for vector extract of FP data using scalar DUP instructions defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>; defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>; -multiclass NeonI_Scalar_DUP_Vec_pattern<Instruction DUPI, +multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI, ValueType ResTy, ValueType OpTy,Operand OpLImm, ValueType NOpTy, ValueType ExTy, Operand OpNImm> { @@ -5764,14 +5749,87 @@ multiclass NeonI_Scalar_DUP_Vec_pattern<Instruction DUPI, (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>; } -// Patterns for extract subvectors of v1ix data using scalar DUP instructions -defm : NeonI_Scalar_DUP_Vec_pattern<DUPbv_B, - v1i8, v16i8, neon_uimm4_bare, v8i8, v16i8, neon_uimm3_bare>; -defm : NeonI_Scalar_DUP_Vec_pattern<DUPhv_H, - v1i16, v8i16, neon_uimm3_bare, v4i16, v8i16, neon_uimm2_bare>; -defm : NeonI_Scalar_DUP_Vec_pattern<DUPsv_S, - v1i32, v4i32, neon_uimm2_bare, v2i32, v4i32, neon_uimm1_bare>; +// Patterns for extract subvectors of v1ix data using scalar DUP instructions. +defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare, + v8i8, v16i8, neon_uimm3_bare>; +defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare, + v4i16, v8i16, neon_uimm2_bare>; +defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare, + v2i32, v4i32, neon_uimm1_bare>; + +multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy, + ValueType OpTy, ValueType ElemTy, + Operand OpImm, ValueType OpNTy, + ValueType ExTy, Operand OpNImm> { + + def : Pat<(ResTy (vector_insert (ResTy undef), + (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), + (neon_uimm0_bare:$Imm))), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (vector_insert (ResTy undef), + (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), + (OpNImm:$Imm))), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy, + ValueType OpTy, ValueType ElemTy, + Operand OpImm, ValueType OpNTy, + ValueType ExTy, Operand OpNImm> { + + def : Pat<(ResTy (scalar_to_vector + (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (scalar_to_vector + (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP +// instructions. +defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D, + v1i64, v2i64, i64, neon_uimm1_bare, + v1i64, v2i64, neon_uimm0_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S, + v1i32, v4i32, i32, neon_uimm2_bare, + v2i32, v4i32, neon_uimm1_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H, + v1i16, v8i16, i32, neon_uimm3_bare, + v4i16, v8i16, neon_uimm2_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B, + v1i8, v16i8, i32, neon_uimm4_bare, + v8i8, v16i8, neon_uimm3_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D, + v1f64, v2f64, f64, neon_uimm1_bare, + v1f64, v2f64, neon_uimm0_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S, + v1f32, v4f32, f32, neon_uimm2_bare, + v2f32, v4f32, neon_uimm1_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D, + v1i64, v2i64, i64, neon_uimm1_bare, + v1i64, v2i64, neon_uimm0_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S, + v1i32, v4i32, i32, neon_uimm2_bare, + v2i32, v4i32, neon_uimm1_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H, + v1i16, v8i16, i32, neon_uimm3_bare, + v4i16, v8i16, neon_uimm2_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B, + v1i8, v16i8, i32, neon_uimm4_bare, + v8i8, v16i8, neon_uimm3_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D, + v1f64, v2f64, f64, neon_uimm1_bare, + v1f64, v2f64, neon_uimm0_bare>; +defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S, + v1f32, v4f32, f32, neon_uimm2_bare, + v2f32, v4f32, neon_uimm1_bare>; multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane, Instruction DUPI, Operand OpImm, @@ -5788,6 +5846,20 @@ defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>; defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; +multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy, + ValueType OpTy> { + def : Pat<(ResTy (GetLow VPR128:$Rn)), + (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; + def : Pat<(ResTy (GetHigh VPR128:$Rn)), + (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; +} + +defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>; +defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>; +defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>; +defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>; +defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>; +defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -7110,13 +7182,10 @@ def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), (FMOVdx $src)>; -def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))), - (v1f32 FPR32:$Rn)>; -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), - (v1f64 FPR64:$Rn)>; - def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), (FMOVdd $src)>; +def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))), + (FMOVss $src)>; def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), |