aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2012-12-25 23:21:29 +0000
committerHal Finkel <hfinkel@anl.gov>2012-12-25 23:21:29 +0000
commit1d59f5fa53cac23b6debc1d7214451c65b0399a7 (patch)
treee6fd24edccb715577c291c34bb801f4c63dffa44
parent64a7a24edf719bb6ffacc030c23f4cd99312f3fb (diff)
downloadexternal_llvm-1d59f5fa53cac23b6debc1d7214451c65b0399a7.zip
external_llvm-1d59f5fa53cac23b6debc1d7214451c65b0399a7.tar.gz
external_llvm-1d59f5fa53cac23b6debc1d7214451c65b0399a7.tar.bz2
LoopVectorize: Enable vectorization of the fmuladd intrinsic
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171076 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp1
-rw-r--r--test/Transforms/LoopVectorize/intrinsic.ll60
2 files changed, 61 insertions, 0 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index d571903..b8b934a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -711,6 +711,7 @@ isTriviallyVectorizableIntrinsic(Instruction *Inst) {
case Intrinsic::nearbyint:
case Intrinsic::pow:
case Intrinsic::fma:
+ case Intrinsic::fmuladd:
return true;
default:
return false;
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 54e3c69..e032041 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -788,6 +788,66 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.fma.f64(double, double, double) nounwind readnone
+;CHECK: @fmuladd_f32
+;CHECK: llvm.fmuladd.v4f32
+;CHECK: ret void
+define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+entry:
+ %cmp12 = icmp sgt i32 %n, 0
+ br i1 %cmp12, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
+ %1 = load float* %arrayidx2, align 4, !tbaa !0
+ %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
+ %2 = load float* %arrayidx4, align 4, !tbaa !0
+ %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
+ %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
+ store float %3, float* %arrayidx6, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+
+;CHECK: @fmuladd_f64
+;CHECK: llvm.fmuladd.v4f64
+;CHECK: ret void
+define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+entry:
+ %cmp12 = icmp sgt i32 %n, 0
+ br i1 %cmp12, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+ %0 = load double* %arrayidx, align 8, !tbaa !3
+ %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
+ %1 = load double* %arrayidx2, align 8, !tbaa !3
+ %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
+ %2 = load double* %arrayidx4, align 8, !tbaa !3
+ %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
+ %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
+ store double %3, double* %arrayidx6, align 8, !tbaa !3
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
+
;CHECK: @pow_f32
;CHECK: llvm.pow.v4f32
;CHECK: ret void