diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-09 01:43:09 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-09 01:43:09 +0000 |
commit | e5118ab7bb4fe184c2c183002158609e5e812791 (patch) | |
tree | 19376b90284d611e5f3bdc4a8ee77d5ce773df6d | |
parent | 8d676c2199f21c5bd07aedd338dd3d228c502d5c (diff) | |
download | external_llvm-e5118ab7bb4fe184c2c183002158609e5e812791.zip external_llvm-e5118ab7bb4fe184c2c183002158609e5e812791.tar.gz external_llvm-e5118ab7bb4fe184c2c183002158609e5e812791.tar.bz2 |
Add two patterns to match special vmovss and vmovsd cases. Also fix
the patterns already there to be more strict regarding the predicate.
This fixes PR10558
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137100 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 55 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-load-store.ll | 18 |
2 files changed, 63 insertions, 10 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 79ae90e..2983187 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -186,26 +186,61 @@ def : Pat<(v4f64 (scalar_to_vector FR64:$src)), (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; let AddedComplexity = 20 in { +let Predicates = [HasSSE1] in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; +} +let Predicates = [HasSSE2] in { + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +} +} + +let AddedComplexity = 20, Predicates = [HasAVX] in { // MOVSSrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. +// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; // MOVSDrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. +// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; +// Represent the same patterns above but in the form they appear for +// 256-bit types +def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; +def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>; } // Store scalar value to memory. diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll index 5196089..0d591ec 100644 --- a/test/CodeGen/X86/avx-load-store.ll +++ b/test/CodeGen/X86/avx-load-store.ll @@ -22,3 +22,21 @@ entry: declare void @dummy(<4 x double>, <8 x float>, <4 x i64>) +;; +;; The two tests below check that we must fold load + scalar_to_vector +;; + ins_subvec+ zext into only a single vmovss or vmovsd + +; CHECK: vmovss (% +define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind { + %val = load float* %ptr + %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0 + ret <8 x float> %i0 +} + +; CHECK: vmovsd (% +define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind { + %val = load double* %ptr + %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0 + ret <4 x double> %i0 +} + |