diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-01-07 21:54:51 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-01-07 21:54:51 +0000 |
commit | 9a6c6a373629fb5a3cc5afd08aafa51339df95df (patch) | |
tree | 1cbd326c4bbe4297f2e8f43f828f7e6acad6ee45 /lib | |
parent | 6c1d4972cf1cd6b6072e31c05f97abb1ed7a8497 (diff) | |
download | external_llvm-9a6c6a373629fb5a3cc5afd08aafa51339df95df.zip external_llvm-9a6c6a373629fb5a3cc5afd08aafa51339df95df.tar.gz external_llvm-9a6c6a373629fb5a3cc5afd08aafa51339df95df.tar.bz2 |
LoopVectorizer: When we vectorizer and widen loops we process many elements at once. This is a good thing, except for
small loops. On small loops post-loop that handles scalars (and runs slower) can take more time to execute than the
rest of the loop. This patch disables widening of loops with a small static trip count.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171798 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 2c1af1d..b266d9d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -101,7 +101,13 @@ EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); /// We don't vectorize loops with a known constant trip count below this number. -static const unsigned TinyTripCountThreshold = 16; +static const unsigned TinyTripCountVectorThreshold = 16; + +/// We don't unroll loops with a known constant trip count below this number. +static const unsigned TinyTripCountUnrollThreshold = 128; + +/// We don't unroll loops that are larget than this threshold. +static const unsigned MaxLoopSizeThreshold = 32; /// When performing a runtime memory check, do not check more than this /// number of pointers. Notice that the check is quadratic! @@ -2016,7 +2022,7 @@ bool LoopVectorizationLegality::canVectorize() { // Do not loop-vectorize loops with a tiny trip count. unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch); - if (TC > 0u && TC < TinyTripCountThreshold) { + if (TC > 0u && TC < TinyTripCountVectorThreshold) { DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is not worth vectorizing.\n"); return false; @@ -2678,6 +2684,12 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, if (OptForSize) return 1; + // Do not unroll loops with a relatively small trip count. + unsigned TC = SE->getSmallConstantTripCount(TheLoop, + TheLoop->getLoopLatch()); + if (TC > 1 && TC < TinyTripCountUnrollThreshold) + return 1; + unsigned TargetVectorRegisters = TTI.getNumberOfRegisters(true); DEBUG(dbgs() << "LV: The target has " << TargetVectorRegisters << " vector registers\n"); @@ -2698,7 +2710,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, // We don't want to unroll the loops to the point where they do not fit into // the decoded cache. Assume that we only allow 32 IR instructions. - UF = std::min(UF, (32 / R.NumInstructions)); + UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions)); // Clamp the unroll factor ranges to reasonable factors. if (UF > MaxUnrollSize) |