SLPVectorizer: Sort PHINodes based on their opcode

Before this patch we relied on the order of phi nodes when we looked for phi nodes of the same type. This could prevent vectorization of cases where there was a phi node of a second type in between phi nodes of some type. This is important for vectorization of an internal graphics kernel. On the test suite + external on x86_64 (and on a run on armv7s) it showed no impact on either performance or compile time. radar://15024459 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192537 91177308-0d34-0410-b5e6-96231b3b80d8
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-10-12 18:56:27 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-10-12 18:56:27 +0000
commit: 24732c3363a9a442c14cf236c3de1086cdee6000 (patch)
tree: bb13ff4e0f982feedf44dc9915df59f86a6ba4a0 /test
parent: fe82a3e360fc850e7551f2f1f32b58e539182c68 (diff)
download: external_llvm-24732c3363a9a442c14cf236c3de1086cdee6000.zip
external_llvm-24732c3363a9a442c14cf236c3de1086cdee6000.tar.gz
external_llvm-24732c3363a9a442c14cf236c3de1086cdee6000.tar.bz2
1 files changed, 34 insertions, 2 deletions
diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll
index 9cc4891..964e0e4 100644
--- a/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -135,14 +135,14 @@ entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
-  %5 = phi float [ %1, %entry ], [ %11, %for.body ]
-  %6 = phi float [ %0, %entry ], [ %9, %for.body ]
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %P.056 = phi float [ %4, %entry ], [ %add26, %for.body ]
   %Y.055 = phi float [ %3, %entry ], [ %add21, %for.body ]
   %B.054 = phi float [ %2, %entry ], [ %add16, %for.body ]
   %G.053 = phi float [ %1, %entry ], [ %add11, %for.body ]
   %R.052 = phi float [ %0, %entry ], [ %add6, %for.body ]
+  %5 = phi float [ %1, %entry ], [ %11, %for.body ]
+  %6 = phi float [ %0, %entry ], [ %9, %for.body ]
   %mul = fmul float %6, 7.000000e+00
   %add6 = fadd float %R.052, %mul
   %mul10 = fmul float %5, 8.000000e+00
@@ -174,6 +174,38 @@ for.end:                                          ; preds = %for.body
   ret float %add31
 }
 
+; Make sure the order of phi nodes of different types does not prevent
+; vectorization of same typed phi nodes.
+; CHECK-LABEL: sort_phi_type
+; CHECK: phi <4 x float>
+; CHECK: fmul <4 x float>
+
+define float @sort_phi_type(float* nocapture readonly %A) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %Y = phi float [ 1.000000e+01, %entry ], [ %mul10, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %B = phi float [ 1.000000e+01, %entry ], [ %mul15, %for.body ]
+  %G = phi float [ 1.000000e+01, %entry ], [ %mul20, %for.body ]
+  %R = phi float [ 1.000000e+01, %entry ], [ %mul25, %for.body ]
+  %mul10 = fmul float %Y, 8.000000e+00
+  %mul15 = fmul float %B, 9.000000e+00
+  %mul20 = fmul float %R, 10.000000e+01
+  %mul25 = fmul float %G, 11.100000e+01
+  %indvars.iv.next = add nsw i64 %indvars.iv, 4
+  %cmp = icmp slt i64 %indvars.iv.next, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add28 = fadd float 1.000000e+01, %mul10
+  %add29 = fadd float %mul10, %mul15
+  %add30 = fadd float %add29, %mul20
+  %add31 = fadd float %add30, %mul25
+  ret float %add31
+}
+
 define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
 ; CHECK-LABEL: @test(
 ;
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-10-12 18:56:27 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-10-12 18:56:27 +0000
commit	24732c3363a9a442c14cf236c3de1086cdee6000 (patch)
tree	bb13ff4e0f982feedf44dc9915df59f86a6ba4a0 /test
parent	fe82a3e360fc850e7551f2f1f32b58e539182c68 (diff)
download	external_llvm-24732c3363a9a442c14cf236c3de1086cdee6000.zip external_llvm-24732c3363a9a442c14cf236c3de1086cdee6000.tar.gz external_llvm-24732c3363a9a442c14cf236c3de1086cdee6000.tar.bz2