SLP Vectorizer: Implement multi-block slp-vectorization.

Rewrote the SLP-vectorization as a whole-function vectorization pass. It is now able to vectorize chains across multiple basic blocks. It still does not vectorize PHIs, but this should be easy to do now that we scan the entire function. I removed the support for extracting values from trees. We are now able to vectorize more programs, but there are some serious regressions in many workloads (such as flops-6 and mandel-2). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184647 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2013-06-22 21:34:10 +0000
committer: Nadav Rotem <nrotem@apple.com> 2013-06-22 21:34:10 +0000
commit: 53a0552b06cb8288004f7698f6e4640fe2a74f61 (patch)
tree: e79c3c8bd0d330a982737744359053feb8251c92 /test
parent: 71f28bf6fb996d5c71671641e5fc8794c7820f3e (diff)
download: external_llvm-53a0552b06cb8288004f7698f6e4640fe2a74f61.zip
external_llvm-53a0552b06cb8288004f7698f6e4640fe2a74f61.tar.gz
external_llvm-53a0552b06cb8288004f7698f6e4640fe2a74f61.tar.bz2
3 files changed, 60 insertions, 5 deletions
diff --git a/test/Transforms/SLPVectorizer/X86/diamond.ll b/test/Transforms/SLPVectorizer/X86/diamond.ll
index 8959b0d..008f09d 100644
--- a/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -50,9 +50,9 @@ entry:
 ; }
 
 ; CHECK: @extr_user
+; CHECK: load i32*
 ; CHECK: store <4 x i32>
-; CHECK-NEXT: extractelement <4 x i32>
-; CHECK: ret
+; CHECK-NEXT: ret
 define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
   %0 = load i32* %A, align 4
@@ -79,9 +79,9 @@ entry:
 
 ; In this example we have an external user that is not the first element in the vector.
 ; CHECK: @extr_user1
+; CHECK: load i32*
 ; CHECK: store <4 x i32>
-; CHECK-NEXT: extractelement <4 x i32>
-; CHECK: ret
+; CHECK-NEXT: ret
 define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
   %0 = load i32* %A, align 4
diff --git a/test/Transforms/SLPVectorizer/X86/multi_block.ll b/test/Transforms/SLPVectorizer/X86/multi_block.ll
new file mode 100644
index 0000000..eed3f37
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/multi_block.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; int bar(double *A, int d) {
+;   double A0 = A[0];
+;   double A1 = A[1];
+;   float F0 = A0;
+;   float F1 = A1;
+;   if (d) foo(); <----- This splits the blocks
+;   F0+=4.0;
+;   F1+=5.0;
+;   A[8] = 9.0 + F0;
+;   A[9] = 5.0 + F1;
+; }
+
+
+;CHECK: @bar
+;CHECK: load <2 x double>
+;CHECK: fptrunc <2 x double>
+;CHECK: call i32
+;CHECK: fadd <2 x float>
+;CHECK: fpext <2 x float>
+;CHECK: store <2 x double>
+;CHECK: ret
+define i32 @bar(double* nocapture %A, i32 %d) {
+  %1 = load double* %A, align 8
+  %2 = getelementptr inbounds double* %A, i64 1
+  %3 = load double* %2, align 8
+  %4 = fptrunc double %1 to float
+  %5 = fptrunc double %3 to float
+  %6 = icmp eq i32 %d, 0
+  br i1 %6, label %9, label %7
+
+; <label>:7                                       ; preds = %0
+  %8 = tail call i32 (...)* @foo()
+  br label %9
+
+; <label>:9                                       ; preds = %0, %7
+  %10 = fadd float %4, 4.000000e+00
+  %11 = fadd float %5, 5.000000e+00
+  %12 = fpext float %10 to double
+  %13 = fadd double %12, 9.000000e+00
+  %14 = getelementptr inbounds double* %A, i64 8
+  store double %13, double* %14, align 8
+  %15 = fpext float %11 to double
+  %16 = fadd double %15, 5.000000e+00
+  %17 = getelementptr inbounds double* %A, i64 9
+  store double %16, double* %17, align 8
+  ret i32 undef
+}
+
+declare i32 @foo(...)
+
diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll
index d4d4d28..aaa6063 100644
--- a/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -12,8 +12,8 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;}
 
 ;CHECK: @foo
-;CHECK: load <4 x i32>
 ;CHECK: insertelement <4 x i32>
+;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret
author	Nadav Rotem <nrotem@apple.com>	2013-06-22 21:34:10 +0000
committer	Nadav Rotem <nrotem@apple.com>	2013-06-22 21:34:10 +0000
commit	53a0552b06cb8288004f7698f6e4640fe2a74f61 (patch)
tree	e79c3c8bd0d330a982737744359053feb8251c92 /test
parent	71f28bf6fb996d5c71671641e5fc8794c7820f3e (diff)
download	external_llvm-53a0552b06cb8288004f7698f6e4640fe2a74f61.zip external_llvm-53a0552b06cb8288004f7698f6e4640fe2a74f61.tar.gz external_llvm-53a0552b06cb8288004f7698f6e4640fe2a74f61.tar.bz2