diff options
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 7 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/funcall.ll | 32 |
2 files changed, 39 insertions, 0 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f41bd28..e6e0f6b 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3520,6 +3520,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // but is not a load, then we quit. Notice that we don't handle function // calls that read or write. if (it->mayReadFromMemory()) { + // Many math library functions read the rounding mode. We will only + // vectorize a loop if it contains known function calls that don't set + // the flag. Therefore, it is safe to ignore this read from memory. + CallInst *Call = dyn_cast<CallInst>(it); + if (Call && getIntrinsicIDForCall(Call, TLI)) + continue; + LoadInst *Ld = dyn_cast<LoadInst>(it); if (!Ld) return false; if (!Ld->isSimple() && !IsAnnotatedParallel) { diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll new file mode 100644 index 0000000..0fb929f --- /dev/null +++ b/test/Transforms/LoopVectorize/funcall.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Make sure we can vectorize loops with functions to math library functions. +; They might read the rounding mode but we are only vectorizing loops that +; contain a limited set of function calls and none of them sets the rounding +; mode, so vectorizing them is safe. + +; CHECK: test +; CHECK: <2 x double> + +define void @test(double* %d, double %t) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double* %d, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %1 = tail call double @llvm.pow.f64(double %0, double %t) + store double %1, double* %arrayidx, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +declare double @llvm.pow.f64(double, double) |