PR14448 - prevent the loop vectorizer from vectorizing the same loop twice.

The LoopVectorizer often runs multiple times on the same function due to inlining. When this happens the loop vectorizer often vectorizes the same loops multiple times, increasing code size and adding unneeded branches. With this patch, the vectorizer during vectorization puts metadata on scalar loops and marks them as 'already vectorized' so that it knows to ignore them when it sees them a second time. PR14448. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176399 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2013-03-02 01:33:49 +0000
committer: Nadav Rotem <nrotem@apple.com> 2013-03-02 01:33:49 +0000
commit: 5290baacb8ca4fb75d798e873a441cad11cbfb2c (patch)
tree: 25909512c21f2b3608f62052422fd3a0fbd5d1a6 /test/Transforms/LoopVectorize/vectorize-once.ll
parent: 328d1b65002e68ae65ffef05eed19122cbf721f5 (diff)
download: external_llvm-5290baacb8ca4fb75d798e873a441cad11cbfb2c.zip
external_llvm-5290baacb8ca4fb75d798e873a441cad11cbfb2c.tar.gz
external_llvm-5290baacb8ca4fb75d798e873a441cad11cbfb2c.tar.bz2
1 files changed, 75 insertions, 0 deletions
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
new file mode 100644
index 0000000..ac16948
--- /dev/null
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;
+; We want to make sure that we are vectorizeing the scalar loop only once
+; even if the pass manager runs the vectorizer multiple times due to inlining.
+
+
+; This test checks that we add metadata to vectorized loops
+; CHECK: _Z4foo1Pii
+; CHECK: <4 x i32>
+; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: ret
+
+; This test comes from the loop:
+;
+;int foo (int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+;}
+define i32 @_Z4foo1Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+; This test checks that we don't vectorize loops that are marked with the "already vectorized" metadata.
+; CHECK: _Z4foo2Pii
+; CHECK-NOT: <4 x i32>
+; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: ret
+define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.vectorizer.already_vectorized !3
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{}
+
author	Nadav Rotem <nrotem@apple.com>	2013-03-02 01:33:49 +0000
committer	Nadav Rotem <nrotem@apple.com>	2013-03-02 01:33:49 +0000
commit	5290baacb8ca4fb75d798e873a441cad11cbfb2c (patch)
tree	25909512c21f2b3608f62052422fd3a0fbd5d1a6 /test/Transforms/LoopVectorize/vectorize-once.ll
parent	328d1b65002e68ae65ffef05eed19122cbf721f5 (diff)
download	external_llvm-5290baacb8ca4fb75d798e873a441cad11cbfb2c.zip external_llvm-5290baacb8ca4fb75d798e873a441cad11cbfb2c.tar.gz external_llvm-5290baacb8ca4fb75d798e873a441cad11cbfb2c.tar.bz2