aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/LoopVectorize/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms/LoopVectorize/PowerPC')
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll73
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll4
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll16
3 files changed, 83 insertions, 10 deletions
diff --git a/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll b/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
new file mode 100644
index 0000000..de6595f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+
+; CHECK: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT: fadd
+; CHECK-NEXT-NOT: fadd
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-ibm-linux-gnu"
+
+define void @QLA_F3_r_veq_norm2_V(float* noalias nocapture %r, [3 x { float, float }]* noalias nocapture readonly %a, i32 signext %n) #0 {
+entry:
+ %cmp24 = icmp sgt i32 %n, 0
+ br i1 %cmp24, label %for.cond1.preheader.preheader, label %for.end13
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ]
+ %sum.026 = phi double [ %add10.2, %for.cond1.preheader ], [ 0.000000e+00, %for.cond1.preheader.preheader ]
+ %arrayidx5.realp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 0
+ %arrayidx5.real = load float, float* %arrayidx5.realp, align 8
+ %arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 1
+ %arrayidx5.imag = load float, float* %arrayidx5.imagp, align 8
+ %mul = fmul fast float %arrayidx5.real, %arrayidx5.real
+ %mul9 = fmul fast float %arrayidx5.imag, %arrayidx5.imag
+ %add = fadd fast float %mul9, %mul
+ %conv = fpext float %add to double
+ %add10 = fadd fast double %conv, %sum.026
+ %arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 0
+ %arrayidx5.real.1 = load float, float* %arrayidx5.realp.1, align 8
+ %arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 1
+ %arrayidx5.imag.1 = load float, float* %arrayidx5.imagp.1, align 8
+ %mul.1 = fmul fast float %arrayidx5.real.1, %arrayidx5.real.1
+ %mul9.1 = fmul fast float %arrayidx5.imag.1, %arrayidx5.imag.1
+ %add.1 = fadd fast float %mul9.1, %mul.1
+ %conv.1 = fpext float %add.1 to double
+ %add10.1 = fadd fast double %conv.1, %add10
+ %arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 0
+ %arrayidx5.real.2 = load float, float* %arrayidx5.realp.2, align 8
+ %arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 1
+ %arrayidx5.imag.2 = load float, float* %arrayidx5.imagp.2, align 8
+ %mul.2 = fmul fast float %arrayidx5.real.2, %arrayidx5.real.2
+ %mul9.2 = fmul fast float %arrayidx5.imag.2, %arrayidx5.imag.2
+ %add.2 = fadd fast float %mul9.2, %mul.2
+ %conv.2 = fpext float %add.2 to double
+ %add10.2 = fadd fast double %conv.2, %add10.1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.for.end13_crit_edge, label %for.cond1.preheader
+
+for.cond.for.end13_crit_edge: ; preds = %for.cond1.preheader
+ %add10.2.lcssa = phi double [ %add10.2, %for.cond1.preheader ]
+ %phitmp = fptrunc double %add10.2.lcssa to float
+ br label %for.end13
+
+for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry
+ %sum.0.lcssa = phi float [ %phitmp, %for.cond.for.end13_crit_edge ], [ 0.000000e+00, %entry ]
+ store float %sum.0.lcssa, float* %r, align 4
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
index 25e7d24..2898af2 100644
--- a/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
+++ b/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
@@ -29,8 +29,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body, %for.body.lr.ph
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
%redx.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body ]
- %arrayidx = getelementptr inbounds double* %arr, i64 %indvars.iv
- %1 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds double, double* %arr, i64 %indvars.iv
+ %1 = load double, double* %arrayidx, align 8
%add = fadd fast double %1, %redx.05
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv to i32
diff --git a/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
index 6cd9c4d..65b3919 100644
--- a/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
+++ b/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
@@ -9,7 +9,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @s173() #0 {
entry:
- %0 = load i32* @ntimes, align 4
+ %0 = load i32, i32* @ntimes, align 4
%cmp21 = icmp sgt i32 %0, 0
br i1 %cmp21, label %for.cond1.preheader, label %for.end12
@@ -19,13 +19,13 @@ for.cond1.preheader: ; preds = %for.end, %entry
for.body3: ; preds = %for.body3, %for.cond1.preheader
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
- %arrayidx = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
- %1 = load float* %arrayidx, align 4
- %arrayidx5 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
- %2 = load float* %arrayidx5, align 4
+ %arrayidx = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
+ %1 = load float, float* %arrayidx, align 4
+ %arrayidx5 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
+ %2 = load float, float* %arrayidx5, align 4
%add = fadd float %1, %2
%3 = add nsw i64 %indvars.iv, 16000
- %arrayidx8 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
+ %arrayidx8 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
store float %add, float* %arrayidx8, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 16000
@@ -33,7 +33,7 @@ for.body3: ; preds = %for.body3, %for.con
for.end: ; preds = %for.body3
%inc11 = add nsw i32 %nl.022, 1
- %4 = load i32* @ntimes, align 4
+ %4 = load i32, i32* @ntimes, align 4
%mul = mul nsw i32 %4, 10
%cmp = icmp slt i32 %inc11, %mul
br i1 %cmp, label %for.cond1.preheader, label %for.end12
@@ -42,7 +42,7 @@ for.end12: ; preds = %for.end, %entry
ret i32 0
; CHECK-LABEL: @s173
-; CHECK: load <4 x float>*
+; CHECK: load <4 x float>, <4 x float>*
; CHECK: add i64 %index, 16000
; CHECK: ret i32 0
}