aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-07-26 23:07:55 +0000
committerNadav Rotem <nrotem@apple.com>2013-07-26 23:07:55 +0000
commit67a38a2875f05ea9c219ab73c4398ee675eb4292 (patch)
treea5858fa365316a162134620715e4416ef74cb8dc
parenta629c3a4f05a8e7976142577872aef95f88c86a0 (diff)
downloadexternal_llvm-67a38a2875f05ea9c219ab73c4398ee675eb4292.zip
external_llvm-67a38a2875f05ea9c219ab73c4398ee675eb4292.tar.gz
external_llvm-67a38a2875f05ea9c219ab73c4398ee675eb4292.tar.bz2
SLP Vectorier: Don't vectorize really short chains because they are already handled by the SelectionDAG store-vectorizer, which does a better job in deciding when to vectorize.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187267 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--test/Transforms/SLPVectorizer/X86/reduction2.ll4
2 files changed, 9 insertions, 3 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c1accd3..50e37e9 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -898,8 +898,12 @@ int BoUpSLP::getTreeCost() {
DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
VectorizableTree.size() << ".\n");
- if (!VectorizableTree.size()) {
- assert(!ExternalUses.size() && "We should not have any external users");
+ // Don't vectorize tiny trees. Small load/store chains or consecutive stores
+ // of constants will be vectoried in SelectionDAG in MergeConsecutiveStores.
+ if (VectorizableTree.size() < 3) {
+ if (!VectorizableTree.size()) {
+ assert(!ExternalUses.size() && "We should not have any external users");
+ }
return 0;
}
diff --git a/test/Transforms/SLPVectorizer/X86/reduction2.ll b/test/Transforms/SLPVectorizer/X86/reduction2.ll
index 1dc77d2..f21e86c 100644
--- a/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -16,11 +16,13 @@ define double @foo(double* nocapture %D) {
%3 = getelementptr inbounds double* %D, i32 %2
%4 = load double* %3, align 4
%A4 = fmul double %4, %4
+ %A42 = fmul double %A4, %A4
%5 = or i32 %2, 1
%6 = getelementptr inbounds double* %D, i32 %5
%7 = load double* %6, align 4
%A7 = fmul double %7, %7
- %8 = fadd double %A4, %A7
+ %A72 = fmul double %A7, %A7
+ %8 = fadd double %A42, %A72
%9 = fadd double %sum.01, %8
%10 = add nsw i32 %i.02, 1
%exitcond = icmp eq i32 %10, 100