LoopVectorizer: If dependency checks fail try runtime checks

When a dependence check fails we can still try to vectorize loops with runtime array bounds checks. This helps linpack to vectorize a loop in dgefa. And we are back to 2x of the scalar performance on a corei7-avx. radar://15339680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193853 91177308-0d34-0410-b5e6-96231b3b80d8
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-11-01 03:05:07 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-11-01 03:05:07 +0000
commit: 0097e155025767c11790912dcf780f82dffaffb1 (patch)
tree: bd5e660c6fe3dcd9d182c532fe9ff3e506d1851d /lib/Transforms/Vectorize
parent: d272a1223314a69e4678816feeff2cfb3e740f8f (diff)
download: external_llvm-0097e155025767c11790912dcf780f82dffaffb1.zip
external_llvm-0097e155025767c11790912dcf780f82dffaffb1.tar.gz
external_llvm-0097e155025767c11790912dcf780f82dffaffb1.tar.bz2
1 files changed, 47 insertions, 5 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e972326..f18707c 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3061,7 +3061,7 @@ public:
   /// non-intersection.
   bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
                        unsigned &NumComparisons, ScalarEvolution *SE,
-                       Loop *TheLoop);
+                       Loop *TheLoop, bool ShouldCheckStride = false);
 
   /// \brief Goes over all memory accesses, checks whether a RT check is needed
   /// and builds sets of dependent accesses.
@@ -3075,6 +3075,7 @@ public:
   bool isRTCheckNeeded() { return IsRTCheckNeeded; }
 
   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+  void resetDepChecks() { CheckDeps.clear(); }
 
   MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
 
@@ -3129,10 +3130,15 @@ static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
   return AR->isAffine();
 }
 
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
+                        const Loop *Lp);
+
 bool AccessAnalysis::canCheckPtrAtRT(
                        LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
                         unsigned &NumComparisons, ScalarEvolution *SE,
-                        Loop *TheLoop) {
+                        Loop *TheLoop, bool ShouldCheckStride) {
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   unsigned NumReadPtrChecks = 0;
@@ -3160,7 +3166,10 @@ bool AccessAnalysis::canCheckPtrAtRT(
     else
       ++NumReadPtrChecks;
 
-    if (hasComputableBounds(SE, Ptr)) {
+    if (hasComputableBounds(SE, Ptr) &&
+        // When we run after a failing dependency check we have to make sure we
+        // don't have wrapping pointers.
+        (!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
       // The id of the dependence set.
       unsigned DepId;
 
@@ -3342,8 +3351,9 @@ public:
   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
 
-  MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L) :
-    SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0) {}
+  MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L)
+      : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
+        ShouldRetryWithRuntimeCheck(false) {}
 
   /// \brief Register the location (instructions are given increasing numbers)
   /// of a write access.
@@ -3373,6 +3383,10 @@ public:
   /// the accesses safely with.
   unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
 
+  /// \brief In same cases when the dependency check fails we can still
+  /// vectorize the loop with a dynamic array access check.
+  bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
+
 private:
   ScalarEvolution *SE;
   DataLayout *DL;
@@ -3390,6 +3404,10 @@ private:
   // We can access this many bytes in parallel safely.
   unsigned MaxSafeDepDistBytes;
 
+  /// \brief If we see a non constant dependence distance we can still try to
+  /// vectorize this loop with runtime checks.
+  bool ShouldRetryWithRuntimeCheck;
+
   /// \brief Check whether there is a plausible dependence between the two
   /// accesses.
   ///
@@ -3587,6 +3605,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
   if (!C) {
     DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
+    ShouldRetryWithRuntimeCheck = true;
     return true;
   }
 
@@ -3876,6 +3895,29 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
                                        Accesses.getDependenciesToCheck());
     MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+    if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+      DEBUG(dbgs() << "LV: Retrying with memory checks\n");
+      NeedRTCheck = true;
+
+      // Clear the dependency checks. We assume they are not needed.
+      Accesses.resetDepChecks();
+
+      PtrRtCheck.reset();
+      PtrRtCheck.Need = true;
+
+      CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+                                         TheLoop, true);
+      // Check that we did not collect too many pointers or found an unsizeable
+      // pointer.
+      if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+        DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
+        PtrRtCheck.reset();
+        return false;
+      }
+
+      CanVecMem = true;
+    }
   }
 
   DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-11-01 03:05:07 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-11-01 03:05:07 +0000
commit	0097e155025767c11790912dcf780f82dffaffb1 (patch)
tree	bd5e660c6fe3dcd9d182c532fe9ff3e506d1851d /lib/Transforms/Vectorize
parent	d272a1223314a69e4678816feeff2cfb3e740f8f (diff)
download	external_llvm-0097e155025767c11790912dcf780f82dffaffb1.zip external_llvm-0097e155025767c11790912dcf780f82dffaffb1.tar.gz external_llvm-0097e155025767c11790912dcf780f82dffaffb1.tar.bz2