diff options
author | Hal Finkel <hfinkel@anl.gov> | 2012-04-13 20:45:45 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2012-04-13 20:45:45 +0000 |
commit | fc3665c87519850f629c9565535e3be447e10add (patch) | |
tree | 1b11b5add5b97ad8f14fda8f200f22e2774d24f2 | |
parent | 8800cada206d6f2eda1d54b2a40f4403e9e522bc (diff) | |
download | external_llvm-fc3665c87519850f629c9565535e3be447e10add.zip external_llvm-fc3665c87519850f629c9565535e3be447e10add.tar.gz external_llvm-fc3665c87519850f629c9565535e3be447e10add.tar.bz2 |
Add support to BBVectorize for vectorizing selects.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154700 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Transforms/Vectorize.h | 3 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/BBVectorize.cpp | 8 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/simple-sel.ll | 30 |
3 files changed, 41 insertions, 0 deletions
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h index 7701ceb..776c005 100644 --- a/include/llvm/Transforms/Vectorize.h +++ b/include/llvm/Transforms/Vectorize.h @@ -43,6 +43,9 @@ struct VectorizeConfig { /// @brief Vectorize the fused-multiply-add intrinsic. bool VectorizeFMA; + /// @brief Vectorize select instructions. + bool VectorizeSelect; + /// @brief Vectorize loads and stores. bool VectorizeMemOps; diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 286b54f..b2f8c8f 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -96,6 +96,10 @@ NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); static cl::opt<bool> +NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize select instructions")); + +static cl::opt<bool> NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize loads and stores")); @@ -552,6 +556,9 @@ namespace { Type *DestTy = C->getDestTy(); if (!DestTy->isSingleValueType() || DestTy->isPointerTy()) return false; + } else if (isa<SelectInst>(I)) { + if (!Config.VectorizeSelect) + return false; } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { return false; @@ -1894,6 +1901,7 @@ VectorizeConfig::VectorizeConfig() { VectorizeCasts = !::NoCasts; VectorizeMath = !::NoMath; VectorizeFMA = !::NoFMA; + VectorizeSelect = !::NoSelect; VectorizeMemOps = !::NoMemOps; AlignedOnly = ::AlignedOnly; ReqChainDepth= ::ReqChainDepth; diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll new file mode 100644 index 0000000..4daa571 --- /dev/null +++ b/test/Transforms/BBVectorize/simple-sel.ll @@ -0,0 +1,30 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s + +; Basic depth-3 chain with select +define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) { +; CHECK: @test1 +; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 +; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 +; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 +; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 + %Z1 = select i1 %C1, double %Y1, double %B1 + %Z2 = select i1 %C2, double %Y2, double %B2 +; CHECK: %Z1.v.i0.1 = insertelement <2 x i1> undef, i1 %C1, i32 0 +; CHECK: %Z1.v.i0.2 = insertelement <2 x i1> %Z1.v.i0.1, i1 %C2, i32 1 +; CHECK: %Z1 = select <2 x i1> %Z1.v.i0.2, <2 x double> %Y1, <2 x double> %X1.v.i1.2 + %R = fmul double %Z1, %Z2 +; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 +; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 +; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 + ret double %R +; CHECK: ret double %R +} + + |