diff options
author | Tim Northover <Tim.Northover@arm.com> | 2012-04-26 08:46:29 +0000 |
---|---|---|
committer | Tim Northover <Tim.Northover@arm.com> | 2012-04-26 08:46:29 +0000 |
commit | 37abe8df4a4e74513e2daa0cdf0b801bec94dade (patch) | |
tree | 3781eee52c4e726f27b66a97ce219ab6df307549 | |
parent | e38993f89288f8dd96451fe3ba514950520757ad (diff) | |
download | external_llvm-37abe8df4a4e74513e2daa0cdf0b801bec94dade.zip external_llvm-37abe8df4a4e74513e2daa0cdf0b801bec94dade.tar.gz external_llvm-37abe8df4a4e74513e2daa0cdf0b801bec94dade.tar.bz2 |
Use VLD1 in NEON extenting-load patterns instead of VLDR.
On some cores it's a bad idea for performance to mix VFP and NEON instructions
and since these patterns are NEON anyway, the NEON load should be used.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155630 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 115 | ||||
-rw-r--r-- | test/CodeGen/ARM/vector-extend-narrow.ll | 8 |
2 files changed, 65 insertions, 58 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f3cdb92..31dd843 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5595,47 +5595,51 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: -// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr)) -// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>; +// Lengthen_Single<"8", "i16", "i8"> = +// Pat<(v8i16 (extloadvi8 addrmode6oneL32:$addr)) +// (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0)))>; multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) - (VLDRD addrmode5:$addr))>; + (VLD1LNd32 addrmode6oneL32:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))>; def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) - (VLDRD addrmode5:$addr))>; + (VLD1LNd32 addrmode6oneL32:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))>; def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) - (VLDRD addrmode5:$addr))>; + (VLD1LNd32 addrmode6oneL32:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))>; } // extload, zextload and sextload for a lengthening load which only uses // half the lanes available. Example: // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = -// Pat<(v4i16 (extloadvi8 addrmode5:$addr)) -// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), -// (VLDRS addrmode5:$addr), -// ssub_0)), +// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), // dsub_0)>; multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, string InsnLanes, string InsnTy> { def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)>; def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)>; def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)>; } @@ -5645,32 +5649,32 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = // Pat<(v4i32 (extloadvi8 addrmode5:$addr)) // (EXTRACT_SUBREG (VMOVLuv4i32 -// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), -// (VLDRS addrmode5:$addr), -// ssub_0)), +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), +// (i32 0))), // dsub_0)), -// qsub_0)>; +// dsub_0)>; multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, string Insn1Lanes, string Insn1Ty, string Insn2Lanes, string Insn2Ty> { def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0))>; + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0))>; + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0))>; + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; } // extload, zextload and sextload for a lengthening load followed by another @@ -5678,36 +5682,35 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). // // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = -// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) -// (EXTRACT_SUBREG (VMOVLuv4i32 -// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), -// (VLDRS addrmode5:$addr), -// ssub_0)), -// dsub_0)), -// dsub_0)>; +// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), +// dsub_0)), +// dsub_0)>; multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, string Insn1Lanes, string Insn1Ty, string Insn2Lanes, string Insn2Ty> { def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), dsub_0)>; def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), dsub_0)>; def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) - (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), dsub_0)>; } @@ -5727,18 +5730,18 @@ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 -def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), +def : Pat<(v2i64 (extloadvi8 addrmode6oneL32:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), - dsub_0)), dsub_0))>; -def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)), + (VLD1LNd32 addrmode6oneL32:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +def : Pat<(v2i64 (zextloadvi8 addrmode6oneL32:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), - dsub_0)), dsub_0))>; -def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)), + (VLD1LNd32 addrmode6oneL32:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +def : Pat<(v2i64 (sextloadvi8 addrmode6oneL32:$addr)), (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), - dsub_0)), dsub_0))>; + (VLD1LNd32 addrmode6oneL32:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; //===----------------------------------------------------------------------===// // Assembler aliases diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll index 1ec36da..8fd3db2 100644 --- a/test/CodeGen/ARM/vector-extend-narrow.ll +++ b/test/CodeGen/ARM/vector-extend-narrow.ll @@ -20,7 +20,9 @@ define float @f(<4 x i16>* nocapture %in) { ; CHECK: g: define float @g(<4 x i8>* nocapture %in) { - ; CHECK: vldr +; Note: vld1 here is reasonably important. Mixing VFP and NEON +; instructions is bad on some cores + ; CHECK: vld1 ; CHECK: vmovl.u8 ; CHECK: vmovl.u16 %1 = load <4 x i8>* %in @@ -47,7 +49,9 @@ define <4 x i8> @h(<4 x float> %v) { ; CHECK: i: define <4 x i8> @i(<4 x i8>* %x) { - ; CHECK: vldr +; Note: vld1 here is reasonably important. Mixing VFP and NEON +; instructions is bad on some cores + ; CHECK: vld1 ; CHECK: vmovl.s8 ; CHECK: vmovl.s16 ; CHECK: vrecpe |