aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/LoopVectorize/X86
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms/LoopVectorize/X86')
-rw-r--r--test/Transforms/LoopVectorize/X86/already-vectorized.ll7
-rw-r--r--test/Transforms/LoopVectorize/X86/assume.ll20
-rw-r--r--test/Transforms/LoopVectorize/X86/avx1.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/avx512.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/constant-vector-operand.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/conversion-cost.ll4
-rw-r--r--test/Transforms/LoopVectorize/X86/cost-model.ll14
-rw-r--r--test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/gather-cost.ll28
-rw-r--r--test/Transforms/LoopVectorize/X86/gcc-examples.ll16
-rw-r--r--test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll10
-rw-r--r--test/Transforms/LoopVectorize/X86/masked_load_store.ll198
-rw-r--r--test/Transforms/LoopVectorize/X86/metadata-enable.ll24
-rw-r--r--test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll4
-rw-r--r--test/Transforms/LoopVectorize/X86/no-vector.ll4
-rw-r--r--test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll22
-rw-r--r--test/Transforms/LoopVectorize/X86/parallel-loops.ll42
-rw-r--r--test/Transforms/LoopVectorize/X86/powof2div.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/reduction-crash.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/small-size.ll46
-rw-r--r--test/Transforms/LoopVectorize/X86/struct-store.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/tripcount.ll4
-rw-r--r--test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/unroll-pm.ll4
-rw-r--r--test/Transforms/LoopVectorize/X86/unroll-small-loops.ll18
-rw-r--r--test/Transforms/LoopVectorize/X86/unroll_selection.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/veclib-calls.ll182
-rw-r--r--test/Transforms/LoopVectorize/X86/vect.omp.force.ll40
-rw-r--r--test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll16
-rw-r--r--test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll20
-rw-r--r--test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll20
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll40
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks.ll26
-rw-r--r--test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll2
36 files changed, 527 insertions, 352 deletions
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index 29d74a0..c400c76 100644
--- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -21,8 +21,8 @@ for.body: ; preds = %for.body, %entry
; CHECK: LV: We can vectorize this loop!
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
- %arrayidx = getelementptr inbounds [255 x i32]* @a, i64 0, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [255 x i32], [255 x i32]* @a, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %red.05
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 255
@@ -42,5 +42,6 @@ for.end: ; preds = %for.body
; CHECK: [[vect]] = distinct !{[[vect]], [[width:![0-9]+]], [[unroll:![0-9]+]]}
; CHECK: [[width]] = !{!"llvm.loop.vectorize.width", i32 1}
; CHECK: [[unroll]] = !{!"llvm.loop.interleave.count", i32 1}
-; CHECK: [[scalar]] = distinct !{[[scalar]], [[width]], [[unroll]]}
+; CHECK: [[scalar]] = distinct !{[[scalar]], [[runtime_unroll:![0-9]+]], [[width]], [[unroll]]}
+; CHECK: [[runtime_unroll]] = !{!"llvm.loop.unroll.runtime.disable"}
diff --git a/test/Transforms/LoopVectorize/X86/assume.ll b/test/Transforms/LoopVectorize/X86/assume.ll
index a94e24d..4fd378d 100644
--- a/test/Transforms/LoopVectorize/X86/assume.ll
+++ b/test/Transforms/LoopVectorize/X86/assume.ll
@@ -22,12 +22,12 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
- %0 = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
%cmp1 = fcmp ogt float %0, 1.000000e+02
tail call void @llvm.assume(i1 %cmp1)
%add = fadd float %0, 1.000000e+00
- %arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv
+ %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
store float %add, float* %arrayidx5, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv, 1599
@@ -48,13 +48,13 @@ attributes #1 = { nounwind }
; Function Attrs: nounwind uwtable
define void @test2(%struct.data* nocapture readonly %d) #0 {
entry:
- %b = getelementptr inbounds %struct.data* %d, i64 0, i32 1
- %0 = load float** %b, align 8
+ %b = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 1
+ %0 = load float*, float** %b, align 8
%ptrint = ptrtoint float* %0 to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
- %a = getelementptr inbounds %struct.data* %d, i64 0, i32 0
- %1 = load float** %a, align 8
+ %a = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 0
+ %1 = load float*, float** %a, align 8
%ptrint2 = ptrtoint float* %1 to i64
%maskedptr3 = and i64 %ptrint2, 31
%maskcond4 = icmp eq i64 %maskedptr3, 0
@@ -84,11 +84,11 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
tail call void @llvm.assume(i1 %maskcond)
- %arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv
- %2 = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float* %0, i64 %indvars.iv
+ %2 = load float, float* %arrayidx, align 4
%add = fadd float %2, 1.000000e+00
tail call void @llvm.assume(i1 %maskcond4)
- %arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv
+ %arrayidx5 = getelementptr inbounds float, float* %1, i64 %indvars.iv
store float %add, float* %arrayidx5, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv, 1599
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll
index 01c9125..37977c4 100644
--- a/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -12,8 +12,8 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
.lr.ph: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
- %2 = getelementptr inbounds float* %a, i64 %indvars.iv
- %3 = load float* %2, align 4
+ %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ %3 = load float, float* %2, align 4
%4 = fmul float %3, 3.000000e+00
store float %4, float* %2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -35,8 +35,8 @@ define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
.lr.ph: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
- %2 = getelementptr inbounds i64* %a, i64 %indvars.iv
- %3 = load i64* %2, align 4
+ %2 = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
+ %3 = load i64, i64* %2, align 4
%4 = add i64 %3, 3
store i64 %4, i64* %2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/test/Transforms/LoopVectorize/X86/avx512.ll b/test/Transforms/LoopVectorize/X86/avx512.ll
index a220866..754e859 100644
--- a/test/Transforms/LoopVectorize/X86/avx512.ll
+++ b/test/Transforms/LoopVectorize/X86/avx512.ll
@@ -20,7 +20,7 @@ for.body.preheader: ; preds = %entry
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
- %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %n, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
index f4c07b4..d75b1d9 100644
--- a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
+++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
@@ -15,10 +15,10 @@ entry:
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%shl = ashr i32 %0, 3
- %arrayidx2 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
store i32 %shl, i32* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 0af562d..eb2a2a5 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -13,7 +13,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun
.lr.ph: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 3, %0 ]
%2 = trunc i64 %indvars.iv to i8
- %3 = getelementptr inbounds i8* %A, i64 %indvars.iv
+ %3 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
store i8 %2, i8* %3, align 1
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -35,7 +35,7 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
%add = add nsw i64 %indvars.iv, 3
%tofp = sitofp i64 %add to float
- %gep = getelementptr inbounds float* %B, i64 %indvars.iv
+ %gep = getelementptr inbounds float, float* %B, i64 %indvars.iv
store float %tofp, float* %gep, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index 98718e1..0136571 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -21,15 +21,15 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 1
- %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0
- %1 = load i32* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %0
+ %1 = load i32, i32* %arrayidx, align 8
%idxprom1 = sext i32 %1 to i64
- %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1
- %2 = load i32* %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv
- %3 = load i32* %arrayidx4, align 4
+ %arrayidx2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %idxprom1
+ %2 = load i32, i32* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 %indvars.iv
+ %3 = load i32, i32* %arrayidx4, align 4
%idxprom5 = sext i32 %3 to i64
- %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5
+ %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %idxprom5
store i32 %2, i32* %arrayidx6, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
index 529ed88..4a56d6b 100644
--- a/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
@@ -20,10 +20,10 @@ for.body.preheader: ; preds = %entry
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
- %arrayidx = getelementptr inbounds [10000 x float]* @float_array, i64 0, i64 %indvars.iv
- %1 = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [10000 x float], [10000 x float]* @float_array, i64 0, i64 %indvars.iv
+ %1 = load float, float* %arrayidx, align 4
%conv = fptoui float %1 to i32
- %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
store i32 %conv, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
index ef3e3be..c066afc 100644
--- a/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx"
define void @convert() {
entry:
- %0 = load i32* @n, align 4
+ %0 = load i32, i32* @n, align 4
%cmp4 = icmp eq i32 %0, 0
br i1 %cmp4, label %for.end, label %for.body.preheader
@@ -22,10 +22,10 @@ for.body.preheader: ; preds = %entry
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
- %arrayidx = getelementptr inbounds [10000 x double]* @double_array, i64 0, i64 %indvars.iv
- %1 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds [10000 x double], [10000 x double]* @double_array, i64 0, i64 %indvars.iv
+ %1 = load double, double* %arrayidx, align 8
%conv = fptoui double %1 to i32
- %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
store i32 %conv, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%2 = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
index 23e6227..b3a0710 100644
--- a/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
@@ -11,10 +11,10 @@ entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
- %tmp = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %tmp = load float, float* %arrayidx, align 4
%conv = fptosi float %tmp to i8
- %arrayidx2 = getelementptr inbounds i8* %a, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
store i8 %conv, i8* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 256
diff --git a/test/Transforms/LoopVectorize/X86/gather-cost.ll b/test/Transforms/LoopVectorize/X86/gather-cost.ll
index 09363d6..f0e6c8f 100644
--- a/test/Transforms/LoopVectorize/X86/gather-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/gather-cost.ll
@@ -31,32 +31,32 @@ for.body:
%b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
%add = add i64 %v.055, %offset
%mul = mul i64 %add, 3
- %arrayidx = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %mul
- %0 = load float* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [512 x float]* @kernel, i64 0, i64 %v.055
- %1 = load float* %arrayidx2, align 4
+ %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055
+ %1 = load float, float* %arrayidx2, align 4
%mul3 = fmul fast float %0, %1
- %arrayidx4 = getelementptr inbounds [512 x float]* @kernel2, i64 0, i64 %v.055
- %2 = load float* %arrayidx4, align 4
+ %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055
+ %2 = load float, float* %arrayidx4, align 4
%mul5 = fmul fast float %mul3, %2
- %arrayidx6 = getelementptr inbounds [512 x float]* @kernel3, i64 0, i64 %v.055
- %3 = load float* %arrayidx6, align 4
+ %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055
+ %3 = load float, float* %arrayidx6, align 4
%mul7 = fmul fast float %mul5, %3
- %arrayidx8 = getelementptr inbounds [512 x float]* @kernel4, i64 0, i64 %v.055
- %4 = load float* %arrayidx8, align 4
+ %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055
+ %4 = load float, float* %arrayidx8, align 4
%mul9 = fmul fast float %mul7, %4
%add10 = fadd fast float %r.057, %mul9
%arrayidx.sum = add i64 %mul, 1
- %arrayidx11 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
- %5 = load float* %arrayidx11, align 4
+ %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
+ %5 = load float, float* %arrayidx11, align 4
%mul13 = fmul fast float %1, %5
%mul15 = fmul fast float %2, %mul13
%mul17 = fmul fast float %3, %mul15
%mul19 = fmul fast float %4, %mul17
%add20 = fadd fast float %g.056, %mul19
%arrayidx.sum52 = add i64 %mul, 2
- %arrayidx21 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
- %6 = load float* %arrayidx21, align 4
+ %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
+ %6 = load float, float* %arrayidx21, align 4
%mul23 = fmul fast float %1, %6
%mul25 = fmul fast float %2, %mul23
%mul27 = fmul fast float %3, %mul25
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index 05403cd..c581f4b 100644
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -28,12 +28,12 @@ define void @example1() nounwind uwtable ssp {
; <label>:1 ; preds = %1, %0
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
- %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
- %3 = load i32* %2, align 4
- %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
- %5 = load i32* %4, align 4
+ %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32, i32* %4, align 4
%6 = add nsw i32 %5, %3
- %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
store i32 %6, i32* %7, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -61,10 +61,10 @@ define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb,
; <label>:1 ; preds = %1, %0
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
- %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
- %3 = load i16* %2, align 2
+ %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
+ %3 = load i16, i16* %2, align 2
%4 = sext i16 %3 to i32
- %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+ %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
store i32 %4, i32* %5, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
index 46efaf0..cbba530 100644
--- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -14,8 +14,8 @@ entry:
br i1 %cmp27, label %for.body3.lr.ph.us, label %for.end15
for.end.us: ; preds = %for.body3.us
- %arrayidx9.us = getelementptr inbounds i32* %b, i64 %indvars.iv33
- %0 = load i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx9.us = getelementptr inbounds i32, i32* %b, i64 %indvars.iv33
+ %0 = load i32, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3
%add10.us = add nsw i32 %0, 3
store i32 %add10.us, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3
%indvars.iv.next34 = add i64 %indvars.iv33, 1
@@ -28,8 +28,8 @@ for.body3.us: ; preds = %for.body3.us, %for.
%1 = trunc i64 %indvars.iv29 to i32
%add4.us = add i32 %add.us, %1
%idxprom.us = sext i32 %add4.us to i64
- %arrayidx.us = getelementptr inbounds i32* %a, i64 %idxprom.us
- %2 = load i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx.us = getelementptr inbounds i32, i32* %a, i64 %idxprom.us
+ %2 = load i32, i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3
%add5.us = add nsw i32 %2, 1
store i32 %add5.us, i32* %arrayidx7.us, align 4, !llvm.mem.parallel_loop_access !3
%indvars.iv.next30 = add i64 %indvars.iv29, 1
@@ -41,7 +41,7 @@ for.body3.lr.ph.us: ; preds = %for.end.us, %entry
%indvars.iv33 = phi i64 [ %indvars.iv.next34, %for.end.us ], [ 0, %entry ]
%3 = trunc i64 %indvars.iv33 to i32
%add.us = add i32 %3, %k
- %arrayidx7.us = getelementptr inbounds i32* %a, i64 %indvars.iv33
+ %arrayidx7.us = getelementptr inbounds i32, i32* %a, i64 %indvars.iv33
br label %for.body3.us
for.end15: ; preds = %for.end.us, %entry
diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 9e2de80..8c375cc 100644
--- a/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -46,35 +46,35 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %0 = load i32* %i, align 4
+ %0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 10000
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %1 = load i32* %i, align 4
+ %1 = load i32, i32* %i, align 4
%idxprom = sext i32 %1 to i64
- %2 = load i32** %trigger.addr, align 8
- %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
- %3 = load i32* %arrayidx, align 4
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp slt i32 %3, 100
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
- %4 = load i32* %i, align 4
+ %4 = load i32, i32* %i, align 4
%idxprom2 = sext i32 %4 to i64
- %5 = load i32** %B.addr, align 8
- %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2
- %6 = load i32* %arrayidx3, align 4
- %7 = load i32* %i, align 4
+ %5 = load i32*, i32** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 %idxprom2
+ %6 = load i32, i32* %arrayidx3, align 4
+ %7 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
- %8 = load i32** %trigger.addr, align 8
- %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
- %9 = load i32* %arrayidx5, align 4
+ %8 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
+ %9 = load i32, i32* %arrayidx5, align 4
%add = add nsw i32 %6, %9
- %10 = load i32* %i, align 4
+ %10 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %10 to i64
- %11 = load i32** %A.addr, align 8
- %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6
+ %11 = load i32*, i32** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds i32, i32* %11, i64 %idxprom6
store i32 %add, i32* %arrayidx7, align 4
br label %if.end
@@ -82,7 +82,7 @@ if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
- %12 = load i32* %i, align 4
+ %12 = load i32, i32* %i, align 4
%inc = add nsw i32 %12, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
@@ -130,36 +130,36 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %0 = load i32* %i, align 4
+ %0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 10000
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %1 = load i32* %i, align 4
+ %1 = load i32, i32* %i, align 4
%idxprom = sext i32 %1 to i64
- %2 = load i32** %trigger.addr, align 8
- %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
- %3 = load i32* %arrayidx, align 4
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp slt i32 %3, 100
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
- %4 = load i32* %i, align 4
+ %4 = load i32, i32* %i, align 4
%idxprom2 = sext i32 %4 to i64
- %5 = load float** %B.addr, align 8
- %arrayidx3 = getelementptr inbounds float* %5, i64 %idxprom2
- %6 = load float* %arrayidx3, align 4
- %7 = load i32* %i, align 4
+ %5 = load float*, float** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds float, float* %5, i64 %idxprom2
+ %6 = load float, float* %arrayidx3, align 4
+ %7 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
- %8 = load i32** %trigger.addr, align 8
- %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
- %9 = load i32* %arrayidx5, align 4
+ %8 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
+ %9 = load i32, i32* %arrayidx5, align 4
%conv = sitofp i32 %9 to float
%add = fadd float %6, %conv
- %10 = load i32* %i, align 4
+ %10 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %10 to i64
- %11 = load float** %A.addr, align 8
- %arrayidx7 = getelementptr inbounds float* %11, i64 %idxprom6
+ %11 = load float*, float** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds float, float* %11, i64 %idxprom6
store float %add, float* %arrayidx7, align 4
br label %if.end
@@ -167,7 +167,7 @@ if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
- %12 = load i32* %i, align 4
+ %12 = load i32, i32* %i, align 4
%inc = add nsw i32 %12, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
@@ -218,36 +218,36 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %0 = load i32* %i, align 4
+ %0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 10000
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %1 = load i32* %i, align 4
+ %1 = load i32, i32* %i, align 4
%idxprom = sext i32 %1 to i64
- %2 = load i32** %trigger.addr, align 8
- %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
- %3 = load i32* %arrayidx, align 4
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp slt i32 %3, 100
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
- %4 = load i32* %i, align 4
+ %4 = load i32, i32* %i, align 4
%idxprom2 = sext i32 %4 to i64
- %5 = load double** %B.addr, align 8
- %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
- %6 = load double* %arrayidx3, align 8
- %7 = load i32* %i, align 4
+ %5 = load double*, double** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2
+ %6 = load double, double* %arrayidx3, align 8
+ %7 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
- %8 = load i32** %trigger.addr, align 8
- %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
- %9 = load i32* %arrayidx5, align 4
+ %8 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
+ %9 = load i32, i32* %arrayidx5, align 4
%conv = sitofp i32 %9 to double
%add = fadd double %6, %conv
- %10 = load i32* %i, align 4
+ %10 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %10 to i64
- %11 = load double** %A.addr, align 8
- %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6
+ %11 = load double*, double** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds double, double* %11, i64 %idxprom6
store double %add, double* %arrayidx7, align 8
br label %if.end
@@ -255,7 +255,7 @@ if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
- %12 = load i32* %i, align 4
+ %12 = load i32, i32* %i, align 4
%inc = add nsw i32 %12, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
@@ -297,37 +297,37 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %0 = load i32* %i, align 4
+ %0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 10000
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %1 = load i32* %i, align 4
+ %1 = load i32, i32* %i, align 4
%idxprom = sext i32 %1 to i64
- %2 = load i32** %trigger.addr, align 8
- %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
- %3 = load i32* %arrayidx, align 4
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp slt i32 %3, 100
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
- %4 = load i32* %i, align 4
+ %4 = load i32, i32* %i, align 4
%mul = mul nsw i32 %4, 2
%idxprom2 = sext i32 %mul to i64
- %5 = load double** %B.addr, align 8
- %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
- %6 = load double* %arrayidx3, align 8
- %7 = load i32* %i, align 4
+ %5 = load double*, double** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2
+ %6 = load double, double* %arrayidx3, align 8
+ %7 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
- %8 = load i32** %trigger.addr, align 8
- %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
- %9 = load i32* %arrayidx5, align 4
+ %8 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
+ %9 = load i32, i32* %arrayidx5, align 4
%conv = sitofp i32 %9 to double
%add = fadd double %6, %conv
- %10 = load i32* %i, align 4
+ %10 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %10 to i64
- %11 = load double** %A.addr, align 8
- %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6
+ %11 = load double*, double** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds double, double* %11, i64 %idxprom6
store double %add, double* %arrayidx7, align 8
br label %if.end
@@ -335,7 +335,7 @@ if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
- %12 = load i32* %i, align 4
+ %12 = load i32, i32* %i, align 4
%inc = add nsw i32 %12, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
@@ -373,43 +373,43 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %0 = load i32* %i, align 4
+ %0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 10000
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %1 = load i32* %i, align 4
+ %1 = load i32, i32* %i, align 4
%idxprom = sext i32 %1 to i64
- %2 = load i32** %trigger.addr, align 8
- %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
- %3 = load i32* %arrayidx, align 4
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp slt i32 %3, 100
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
- %4 = load i32* %i, align 4
+ %4 = load i32, i32* %i, align 4
%idxprom2 = sext i32 %4 to i64
- %5 = load i32** %B.addr, align 8
- %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2
- %6 = load i32* %arrayidx3, align 4
- %7 = load i32* %i, align 4
+ %5 = load i32*, i32** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 %idxprom2
+ %6 = load i32, i32* %arrayidx3, align 4
+ %7 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
- %8 = load i32** %trigger.addr, align 8
- %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
- %9 = load i32* %arrayidx5, align 4
+ %8 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
+ %9 = load i32, i32* %arrayidx5, align 4
%add = add nsw i32 %6, %9
- %10 = load i32* %i, align 4
+ %10 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %10 to i64
- %11 = load i32** %A.addr, align 8
- %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6
- store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)), i32* %arrayidx7, align 4
+ %11 = load i32*, i32** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds i32, i32* %11, i64 %idxprom6
+ store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)), i32* %arrayidx7, align 4
br label %if.end
if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
- %12 = load i32* %i, align 4
+ %12 = load i32, i32* %i, align 4
%inc = add nsw i32 %12, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
@@ -459,30 +459,30 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %0 = load i32* %i, align 4
+ %0 = load i32, i32* %i, align 4
%cmp = icmp sge i32 %0, 0
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %1 = load i32* %i, align 4
+ %1 = load i32, i32* %i, align 4
%idxprom = sext i32 %1 to i64
- %2 = load i32** %trigger.addr, align 8
- %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
- %3 = load i32* %arrayidx, align 4
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp sgt i32 %3, 0
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
- %4 = load i32* %i, align 4
+ %4 = load i32, i32* %i, align 4
%idxprom2 = sext i32 %4 to i64
- %5 = load double** %in.addr, align 8
- %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
- %6 = load double* %arrayidx3, align 8
+ %5 = load double*, double** %in.addr, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2
+ %6 = load double, double* %arrayidx3, align 8
%add = fadd double %6, 5.000000e-01
- %7 = load i32* %i, align 4
+ %7 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
- %8 = load double** %out.addr, align 8
- %arrayidx5 = getelementptr inbounds double* %8, i64 %idxprom4
+ %8 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %8, i64 %idxprom4
store double %add, double* %arrayidx5, align 8
br label %if.end
@@ -490,7 +490,7 @@ if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
- %9 = load i32* %i, align 4
+ %9 = load i32, i32* %i, align 4
%dec = add nsw i32 %9, -1
store i32 %dec, i32* %i, align 4
br label %for.cond
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index 7feb66c..ba8e11e 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -54,17 +54,17 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %N
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 32
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
for.end: ; preds = %for.body
- %1 = load i32* %a, align 4
+ %1 = load i32, i32* %a, align 4
ret i32 %1
}
@@ -105,17 +105,17 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %N
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 32
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
- %1 = load i32* %a, align 4
+ %1 = load i32, i32* %a, align 4
ret i32 %1
}
@@ -156,17 +156,17 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %N
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 32
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
for.end: ; preds = %for.body
- %1 = load i32* %a, align 4
+ %1 = load i32, i32* %a, align 4
ret i32 %1
}
diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
index fd69dc4..bb972c4 100644
--- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
+++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -10,8 +10,8 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
- %0 = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
%add = fadd float %0, 1.000000e+00
store float %add, float* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/test/Transforms/LoopVectorize/X86/no-vector.ll b/test/Transforms/LoopVectorize/X86/no-vector.ll
index 692eec9..4b464b0 100644
--- a/test/Transforms/LoopVectorize/X86/no-vector.ll
+++ b/test/Transforms/LoopVectorize/X86/no-vector.ll
@@ -8,8 +8,8 @@ entry:
for.body: ; preds = %entry, %for.body
%i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%r.05 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i8* %s, i32 %i.06
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %s, i32 %i.06
+ %0 = load i8, i8* %arrayidx, align 1
%conv = sext i8 %0 to i32
%xor = xor i32 %conv, %r.05
%inc = add nsw i32 %i.06, 1
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index ad01044..631361c 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -17,28 +17,28 @@ entry:
br label %for.body
for.body: ; preds = %for.body.for.body_crit_edge, %entry
- %indvars.iv.reload = load i64* %indvars.iv.reg2mem
- %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload
- %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload
- %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+ %indvars.iv.reload = load i64, i64* %indvars.iv.reg2mem
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.reload
+ %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.reload
+ %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
%idxprom3 = sext i32 %1 to i64
- %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+ %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !3
%indvars.iv.next = add i64 %indvars.iv.reload, 1
; A new store without the parallel metadata here:
store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
- %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem
- %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1
- %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
+ %indvars.iv.next.reload1 = load i64, i64* %indvars.iv.next.reg2mem
+ %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next.reload1
+ %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
- %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem
+ %indvars.iv.next.reload = load i64, i64* %indvars.iv.next.reg2mem
%lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
%exitcond = icmp eq i32 %lftr.wideiv, 512
br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop !3
for.body.for.body_crit_edge: ; preds = %for.body
- %indvars.iv.next.reload2 = load i64* %indvars.iv.next.reg2mem
+ %indvars.iv.next.reload2 = load i64, i64* %indvars.iv.next.reg2mem
store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem
br label %for.body
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index 22ab521..53061ed 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -20,16 +20,16 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
- %1 = load i32* %arrayidx2, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4
%idxprom3 = sext i32 %1 to i64
- %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+ %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
store i32 %0, i32* %arrayidx4, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
- %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
- %2 = load i32* %arrayidx6, align 4
+ %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
+ %2 = load i32, i32* %arrayidx6, align 4
store i32 %2, i32* %arrayidx2, align 4
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 512
@@ -50,18 +50,18 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
- %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
%idxprom3 = sext i32 %1 to i64
- %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+ %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
; This store might have originated from inlining a function with a parallel
; loop. Refers to a list with the "original loop reference" (!4) also included.
store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5
%indvars.iv.next = add i64 %indvars.iv, 1
- %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
- %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
+ %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 512
@@ -83,18 +83,18 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
- %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
%idxprom3 = sext i32 %1 to i64
- %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+ %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
; This refers to the loop marked with !7 which we are not in at the moment.
; It should prevent detecting as a parallel loop.
store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7
%indvars.iv.next = add i64 %indvars.iv, 1
- %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
- %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6
+ %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
+ %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6
store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 512
diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll
index 054da8e..6bc738a 100644
--- a/test/Transforms/LoopVectorize/X86/powof2div.ll
+++ b/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
@Foo = common global %struct.anon zeroinitializer, align 4
;CHECK-LABEL: @foo(
-;CHECK: load <4 x i32>*
+;CHECK: load <4 x i32>, <4 x i32>*
;CHECK: sdiv <4 x i32>
;CHECK: store <4 x i32>
@@ -17,10 +17,10 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%div = sdiv i32 %0, 2
- %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
store i32 %div, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 100
diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
index 3957a55..3741b95 100644
--- a/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -14,17 +14,17 @@ bb:
br label %bb2
bb2: ; preds = %bb
- %tmp = load double* null, align 8
+ %tmp = load double, double* null, align 8
br i1 undef, label %bb3, label %bb12
bb3: ; preds = %bb3, %bb2
%tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
%tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
- %tmp6 = getelementptr inbounds [16 x double]* undef, i32 0, i32 %tmp5
- %tmp7 = load double* %tmp6, align 4
+ %tmp6 = getelementptr inbounds [16 x double], [16 x double]* undef, i32 0, i32 %tmp5
+ %tmp7 = load double, double* %tmp6, align 4
%tmp8 = add nsw i32 %tmp5, 1
%tmp9 = fadd fast double %tmp4, undef
- %tmp10 = getelementptr inbounds float* %arg, i32 %tmp5
+ %tmp10 = getelementptr inbounds float, float* %arg, i32 %tmp5
store float undef, float* %tmp10, align 4
%tmp11 = icmp eq i32 %tmp8, %arg1
br i1 %tmp11, label %bb12, label %bb3
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
index 8c7a881..47c262b 100644
--- a/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -30,12 +30,12 @@ define void @example1() optsize {
; <label>:1 ; preds = %1, %0
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
- %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
- %3 = load i32* %2, align 4
- %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
- %5 = load i32* %4, align 4
+ %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32, i32* %4, align 4
%6 = add nsw i32 %5, %3
- %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
store i32 %6, i32* %7, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -65,7 +65,7 @@ define void @example2(i32 %n, i32 %x) optsize {
.lr.ph5: ; preds = %0, %.lr.ph5
%indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
- %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6
+ %3 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv6
store i32 %x, i32* %3, align 4
%indvars.iv.next7 = add i64 %indvars.iv6, 1
%lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
@@ -76,12 +76,12 @@ define void @example2(i32 %n, i32 %x) optsize {
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
%.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
%4 = add nsw i32 %.02, -1
- %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
- %6 = load i32* %5, align 4
- %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
- %8 = load i32* %7, align 4
+ %5 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %6 = load i32, i32* %5, align 4
+ %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %8 = load i32, i32* %7, align 4
%9 = and i32 %8, %6
- %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %10 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
store i32 %9, i32* %10, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%11 = icmp eq i32 %4, 0
@@ -104,9 +104,9 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
%.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
%.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
%2 = add nsw i32 %.05, -1
- %3 = getelementptr inbounds i32* %.023, i64 1
- %4 = load i32* %.023, align 16
- %5 = getelementptr inbounds i32* %.014, i64 1
+ %3 = getelementptr inbounds i32, i32* %.023, i64 1
+ %4 = load i32, i32* %.023, align 16
+ %5 = getelementptr inbounds i32, i32* %.014, i64 1
store i32 %4, i32* %.014, align 16
%6 = icmp eq i32 %2, 0
br i1 %6, label %._crit_edge, label %.lr.ph
@@ -128,9 +128,9 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
%.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
%.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
%2 = add nsw i32 %.05, -1
- %3 = getelementptr inbounds i32* %.023, i64 1
- %4 = load i32* %.023, align 16
- %5 = getelementptr inbounds i32* %.014, i64 1
+ %3 = getelementptr inbounds i32, i32* %.023, i64 1
+ %4 = load i32, i32* %.023, align 16
+ %5 = getelementptr inbounds i32, i32* %.014, i64 1
store i32 %4, i32* %.014, align 16
%6 = icmp eq i32 %2, 0
br i1 %6, label %._crit_edge, label %.lr.ph
@@ -152,11 +152,11 @@ define void @example23(i16* nocapture %src, i32* nocapture %dst) optsize {
%.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
%.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
%i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
- %2 = getelementptr inbounds i16* %.04, i64 1
- %3 = load i16* %.04, align 2
+ %2 = getelementptr inbounds i16, i16* %.04, i64 1
+ %3 = load i16, i16* %.04, align 2
%4 = zext i16 %3 to i32
%5 = shl nuw nsw i32 %4, 7
- %6 = getelementptr inbounds i32* %.013, i64 1
+ %6 = getelementptr inbounds i32, i32* %.013, i64 1
store i32 %5, i32* %.013, align 4
%7 = add nsw i32 %i.02, 1
%exitcond = icmp eq i32 %7, 256
@@ -178,11 +178,11 @@ define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst
%.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
%.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
%i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
- %2 = getelementptr inbounds i16* %.04, i64 1
- %3 = load i16* %.04, align 2
+ %2 = getelementptr inbounds i16, i16* %.04, i64 1
+ %3 = load i16, i16* %.04, align 2
%4 = zext i16 %3 to i32
%5 = shl nuw nsw i32 %4, 7
- %6 = getelementptr inbounds i32* %.013, i64 1
+ %6 = getelementptr inbounds i32, i32* %.013, i64 1
store i32 %5, i32* %.013, align 4
%7 = add nsw i32 %i.02, 1
%exitcond = icmp eq i32 %7, 256
diff --git a/test/Transforms/LoopVectorize/X86/struct-store.ll b/test/Transforms/LoopVectorize/X86/struct-store.ll
index a995e43..4ff3b0e 100644
--- a/test/Transforms/LoopVectorize/X86/struct-store.ll
+++ b/test/Transforms/LoopVectorize/X86/struct-store.ll
@@ -15,7 +15,7 @@ entry:
loop:
%indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
- %tmp = getelementptr inbounds [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv
+ %tmp = getelementptr inbounds [16 x { i64, i64 }], [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv
store { i64, i64 } { i64 ptrtoint (void ()* @fn to i64), i64 0 }, { i64, i64 }* %tmp, align 16
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll
index a4ec694..c0bbb92 100644
--- a/test/Transforms/LoopVectorize/X86/tripcount.ll
+++ b/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -22,8 +22,8 @@ for.body.preheader:
for.body:
%i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
- %arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @big, i32 0, i32 %i.07
+ %0 = load i32, i32* %arrayidx, align 4
%neg = xor i32 %0, -1
store i32 %neg, i32* %arrayidx, align 4
%inc = add nsw i32 %i.07, 1
diff --git a/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
index 86c32b2..38af11c 100644
--- a/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
@@ -12,10 +12,10 @@ entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i64* %a, i64 %indvars.iv
- %tmp = load i64* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
+ %tmp = load i64, i64* %arrayidx, align 4
%conv = uitofp i64 %tmp to double
- %arrayidx2 = getelementptr inbounds double* %b, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds double, double* %b, i64 %indvars.iv
store double %conv, double* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 256
diff --git a/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
index 5064fec..52914b6 100644
--- a/test/Transforms/LoopVectorize/X86/unroll-pm.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
@@ -17,8 +17,8 @@ define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
.lr.ph: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
- %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
- %3 = load i32* %2, align 4
+ %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %3 = load i32, i32* %2, align 4
%4 = add nsw i32 %3, 6
store i32 %4, i32* %2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index 716dc08..4411da3 100644
--- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -16,8 +16,8 @@ target triple = "x86_64-apple-macosx10.8.0"
; CHECK-VECTOR: ret
;
; CHECK-SCALAR-LABEL: @foo(
-; CHECK-SCALAR: load i32*
-; CHECK-SCALAR-NOT: load i32*
+; CHECK-SCALAR: load i32, i32*
+; CHECK-SCALAR-NOT: load i32, i32*
; CHECK-SCALAR: store i32
; CHECK-SCALAR-NOT: store i32
; CHECK-SCALAR: ret
@@ -26,8 +26,8 @@ define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
; <label>:1 ; preds = %1, %0
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
- %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
- %3 = load i32* %2, align 4
+ %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %3 = load i32, i32* %2, align 4
%4 = add nsw i32 %3, 6
store i32 %4, i32* %2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -57,8 +57,8 @@ define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
.lr.ph: ; preds = %0, %.lr.ph
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
- %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
- %3 = load i32* %2, align 4
+ %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %3 = load i32, i32* %2, align 4
%4 = add nsw i32 %3, 6
store i32 %4, i32* %2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -86,10 +86,10 @@ entry:
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
- %0 = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
%mul = fmul float %0, %N
- %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
store float %mul, float* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 256
diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
index c684b4e..71b8290 100644
--- a/test/Transforms/LoopVectorize/X86/unroll_selection.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -16,8 +16,8 @@ define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
; <label>:2 ; preds = %2, %0
%indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
- %3 = getelementptr inbounds double* %A, i64 %indvars.iv
- %4 = load double* %3, align 8
+ %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv
+ %4 = load double, double* %3, align 8
%5 = fadd double %4, 3.000000e+00
%6 = fmul double %4, 2.000000e+00
%7 = fadd double %5, %6
@@ -58,8 +58,8 @@ define void @small_loop(i16* nocapture %A, i64 %n) nounwind uwtable ssp {
.lr.ph: ; preds = %0, %.lr.ph
%i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ]
- %2 = getelementptr inbounds i16* %A, i64 %i.01
- %3 = load i16* %2, align 2
+ %2 = getelementptr inbounds i16, i16* %A, i64 %i.01
+ %3 = load i16, i16* %2, align 2
%4 = xor i16 %3, 3
store i16 %4, i16* %2, align 2
%5 = add i64 %i.01, 1
diff --git a/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/test/Transforms/LoopVectorize/X86/veclib-calls.ll
new file mode 100644
index 0000000..62e0a44
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/veclib-calls.ll
@@ -0,0 +1,182 @@
+; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @sqrt_f32(
+;CHECK: vsqrtf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @sqrtf(float) nounwind readnone
+define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @sqrtf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;CHECK-LABEL: @exp_f32(
+;CHECK: vexpf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @expf(float) nounwind readnone
+define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @expf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;CHECK-LABEL: @log_f32(
+;CHECK: vlogf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @logf(float) nounwind readnone
+define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @logf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call.
+;CHECK-LABEL: @fabs_f32(
+;CHECK: fabs{{.*}}<4 x float>
+;CHECK: ret void
+declare float @fabsf(float) nounwind readnone
+define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @fabsf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Test that we can vectorize an intrinsic into a vector call.
+;CHECK-LABEL: @exp_f32_intrin(
+;CHECK: vexpf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @llvm.exp.f32(float) nounwind readnone
+define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Test that we don't vectorize arbitrary functions.
+;CHECK-LABEL: @foo_f32(
+;CHECK-NOT: foo{{.*}}<4 x float>
+;CHECK: ret void
+declare float @foo(float) nounwind readnone
+define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @foo(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Test that we don't vectorize calls with nobuiltin attribute.
+;CHECK-LABEL: @sqrt_f32_nobuiltin(
+;CHECK-NOT: vsqrtf{{.*}}<4 x float>
+;CHECK: ret void
+define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
index a781fbe..c2a0fed 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -15,9 +15,9 @@ target triple = "x86_64-apple-macosx10.8.0"
; The source code for the test:
;
; #include <math.h>
-; void foo(float* restrict A, float * restrict B, int size)
+; void foo(float* restrict A, float * restrict B)
; {
-; for (int i = 0; i < size; ++i) A[i] = sinf(B[i]);
+; for (int i = 0; i < 1000; i+=2) A[i] = sinf(B[i]);
; }
;
@@ -25,24 +25,20 @@ target triple = "x86_64-apple-macosx10.8.0"
; This loop will be vectorized, although the scalar cost is lower than any of vector costs, but vectorization is explicitly forced in metadata.
;
-define void @vectorized(float* noalias nocapture %A, float* noalias nocapture %B, i32 %size) {
+define void @vectorized(float* noalias nocapture %A, float* noalias nocapture %B) {
entry:
- %cmp6 = icmp sgt i32 %size, 0
- br i1 %cmp6, label %for.body.preheader, label %for.end
-
-for.body.preheader:
br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
- %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
- %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
%call = tail call float @llvm.sin.f32(float %0)
- %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp eq i32 %lftr.wideiv, %size
+ %exitcond = icmp eq i32 %lftr.wideiv, 1000
br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !1
for.end.loopexit:
@@ -59,24 +55,20 @@ for.end:
; This method will not be vectorized, as scalar cost is lower than any of vector costs.
;
-define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture %B, i32 %size) {
+define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture %B) {
entry:
- %cmp6 = icmp sgt i32 %size, 0
- br i1 %cmp6, label %for.body.preheader, label %for.end
-
-for.body.preheader:
br label %for.body
for.body:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
- %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
- %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
%call = tail call float @llvm.sin.f32(float %0)
- %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp eq i32 %lftr.wideiv, %size
+ %exitcond = icmp eq i32 %lftr.wideiv, 1000
br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !3
for.end.loopexit:
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index e39e6b5..8d139ac 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -29,10 +29,10 @@ entry:
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
- %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
- %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
- %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
+ %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
+ %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
%add = fadd fast float %0, %1
store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -55,10 +55,10 @@ entry:
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
- %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
- %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
- %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+ %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
%add = fadd fast float %0, %1
store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
index ece9895..5efabe1 100644
--- a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
@@ -15,12 +15,12 @@ define void @scalarselect(i1 %cond) {
; <label>:1
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
- %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
- %3 = load i32* %2, align 4
- %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
- %5 = load i32* %4, align 4
+ %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32, i32* %4, align 4
%6 = add nsw i32 %5, %3
- %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
; A scalar select has a cost of 1 on core2
; CHECK: cost of 1 for VF 2 {{.*}} select i1 %cond, i32 %6, i32 0
@@ -42,12 +42,12 @@ define void @vectorselect(i1 %cond) {
; <label>:1
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
- %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
- %3 = load i32* %2, align 4
- %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
- %5 = load i32* %4, align 4
+ %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32, i32* %4, align 4
%6 = add nsw i32 %5, %3
- %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
%8 = icmp ult i64 %indvars.iv, 8
; A vector select has a cost of 1 on core2
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
index e57cfef..6cd3c9c 100644
--- a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -19,7 +19,7 @@ target triple = "x86_64-apple-macosx10.8.0"
; CHECK: test_consecutive_store
; CHECK: The Widest type: 64 bits
define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
- %4 = load %0** %2, align 8
+ %4 = load %0*, %0** %2, align 8
%5 = icmp eq %0** %0, %1
br i1 %5, label %12, label %6
@@ -29,7 +29,7 @@ define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwt
; <label>:7 ; preds = %7, %6
%8 = phi %0** [ %0, %6 ], [ %9, %7 ]
store %0* %4, %0** %8, align 8
- %9 = getelementptr inbounds %0** %8, i64 1
+ %9 = getelementptr inbounds %0*, %0** %8, i64 1
%10 = icmp eq %0** %9, %1
br i1 %10, label %11, label %7
@@ -61,12 +61,12 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable {
; <label>:3 ; preds = %3, %1
%4 = phi i64 [ 0, %1 ], [ %11, %3 ]
- %5 = getelementptr inbounds [2048 x i16]* @q, i64 0, i64 %4
- %6 = load i16* %5, align 2
+ %5 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q, i64 0, i64 %4
+ %6 = load i16, i16* %5, align 2
%7 = sext i16 %6 to i64
%8 = add i64 %7, 1
%9 = inttoptr i64 %8 to i32*
- %10 = getelementptr inbounds [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2
+ %10 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2
store i32* %9, i32** %10, align 8
%11 = add i64 %4, 1
%12 = trunc i64 %11 to i32
@@ -100,8 +100,8 @@ define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
; <label>:1 ; preds = %1, %0
%2 = phi i64 [ 0, %0 ], [ %10, %1 ]
%3 = phi i8 [ 0, %0 ], [ %9, %1 ]
- %4 = getelementptr inbounds [1024 x i32*]* @ia, i32 0, i64 %2
- %5 = load i32** %4, align 4
+ %4 = getelementptr inbounds [1024 x i32*], [1024 x i32*]* @ia, i32 0, i64 %2
+ %5 = load i32*, i32** %4, align 4
%6 = ptrtoint i32* %5 to i64
%7 = trunc i64 %6 to i8
%8 = add i8 %3, 1
@@ -127,9 +127,9 @@ define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
; <label>:3 ; preds = %3, %1
%4 = phi i64 [ 0, %1 ], [ %10, %3 ]
- %5 = getelementptr inbounds [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
- %6 = getelementptr inbounds [2048 x i16]* @q2, i64 0, i64 %4
- %7 = load i32** %5, align 2
+ %5 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
+ %6 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q2, i64 0, i64 %4
+ %7 = load i32*, i32** %5, align 2
%8 = ptrtoint i32* %7 to i64
%9 = trunc i64 %8 to i16
store i16 %9, i16* %6, align 8
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 011ce8e..60ad3c6 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -52,7 +52,7 @@ entry:
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !16
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16
%0 = trunc i64 %indvars.iv to i32, !dbg !16
store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
%cmp3 = icmp sle i32 %0, %Length, !dbg !22
@@ -74,7 +74,7 @@ entry:
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !30
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !30
%0 = trunc i64 %indvars.iv to i32, !dbg !30
store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25
@@ -97,12 +97,12 @@ for.body.preheader: ; preds = %entry
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
- %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv, !dbg !35
- %0 = load i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+ %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
%idxprom1 = sext i32 %0 to i64, !dbg !35
- %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !35
- %1 = load i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
- %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !35
+ %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+ %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+ %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
%lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
@@ -122,40 +122,40 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !{!"0x11\004\00clang version 3.5.0\001\00\006\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
-!1 = !{!"source.cpp", !"."}
+!0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !MDFile(filename: "source.cpp", directory: ".")
!2 = !{}
!3 = !{!4, !7, !8}
-!4 = !{!"0x2e\00test\00test\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*, i32)* @_Z4testPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [./source.cpp]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = !{!"0x2e\00test_disabled\00test_disabled\00\0010\000\001\000\006\00256\001\0010", !1, !5, !6, null, void (i32*, i32)* @_Z13test_disabledPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [test_disabled]
-!8 = !{!"0x2e\00test_array_bounds\00test_array_bounds\00\0016\000\001\000\006\00256\001\0016", !1, !5, !6, null, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, !2} ; [ DW_TAG_subprogram ] [line 16] [def] [test_array_bounds]
+!4 = !MDSubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z4testPii, variables: !2)
+!5 = !MDFile(filename: "source.cpp", directory: ".")
+!6 = !MDSubroutineType(types: !2)
+!7 = !MDSubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z13test_disabledPii, variables: !2)
+!8 = !MDSubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, function: void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 2}
-!10 = !{i32 2, !"Debug Info Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5.0"}
!12 = !MDLocation(line: 3, column: 8, scope: !13)
-!13 = !{!"0xb\003\003\000", !1, !4} ; [ DW_TAG_lexical_block ]
+!13 = distinct !MDLexicalBlock(line: 3, column: 3, file: !1, scope: !4)
!14 = !{!14, !15, !15}
!15 = !{!"llvm.loop.vectorize.enable", i1 true}
!16 = !MDLocation(line: 4, column: 5, scope: !17)
-!17 = !{!"0xb\003\0036\000", !1, !13} ; [ DW_TAG_lexical_block ]
+!17 = distinct !MDLexicalBlock(line: 3, column: 36, file: !1, scope: !13)
!18 = !{!19, !19, i64 0}
!19 = !{!"int", !20, i64 0}
!20 = !{!"omnipotent char", !21, i64 0}
!21 = !{!"Simple C/C++ TBAA"}
!22 = !MDLocation(line: 5, column: 9, scope: !23)
-!23 = !{!"0xb\005\009\000", !1, !17} ; [ DW_TAG_lexical_block ]
+!23 = distinct !MDLexicalBlock(line: 5, column: 9, file: !1, scope: !17)
!24 = !MDLocation(line: 8, column: 1, scope: !4)
!25 = !MDLocation(line: 12, column: 8, scope: !26)
-!26 = !{!"0xb\0012\003\000", !1, !7} ; [ DW_TAG_lexical_block ]
+!26 = distinct !MDLexicalBlock(line: 12, column: 3, file: !1, scope: !7)
!27 = !{!27, !28, !29}
!28 = !{!"llvm.loop.interleave.count", i32 1}
!29 = !{!"llvm.loop.vectorize.width", i32 1}
!30 = !MDLocation(line: 13, column: 5, scope: !26)
!31 = !MDLocation(line: 14, column: 1, scope: !7)
!32 = !MDLocation(line: 18, column: 8, scope: !33)
-!33 = !{!"0xb\0018\003\000", !1, !8} ; [ DW_TAG_lexical_block ]
+!33 = distinct !MDLexicalBlock(line: 18, column: 3, file: !1, scope: !8)
!34 = !{!34, !15}
!35 = !MDLocation(line: 19, column: 5, scope: !33)
!36 = !MDLocation(line: 20, column: 1, scope: !8)
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index 16fe370..a4e895a 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -26,11 +26,11 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%add8 = phi i32 [ 0, %entry ], [ %add, %for.body ], !dbg !19
- %arrayidx = getelementptr inbounds [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19
- %0 = load i8* %arrayidx, align 1, !dbg !19, !tbaa !21
+ %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19
+ %0 = load i8, i8* %arrayidx, align 1, !dbg !19, !tbaa !21
%conv = sext i8 %0 to i32, !dbg !19
- %arrayidx2 = getelementptr inbounds [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19
- %1 = load i8* %arrayidx2, align 1, !dbg !19, !tbaa !21
+ %arrayidx2 = getelementptr inbounds [16 x i8], [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19
+ %1 = load i8, i8* %arrayidx2, align 1, !dbg !19, !tbaa !21
%conv3 = sext i8 %1 to i32, !dbg !19
%sub = sub i32 %conv, %conv3, !dbg !19
%add = add nsw i32 %sub, %add8, !dbg !19
@@ -49,14 +49,14 @@ declare void @ibar(i32*) #1
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!1 = !{!"vectorization-remarks.c", !"."}
+!1 = !MDFile(filename: "vectorization-remarks.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\006", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !MDSubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!5 = !MDFile(filename: "vectorization-remarks.c", directory: ".")
+!6 = !MDSubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
-!8 = !{i32 1, !"Debug Info Version", i32 2}
+!8 = !{i32 1, !"Debug Info Version", i32 3}
!9 = !{!"clang version 3.5.0 "}
!10 = !MDLocation(line: 8, column: 3, scope: !4)
!11 = !{!12, !12, i64 0}
@@ -64,11 +64,11 @@ declare void @ibar(i32*) #1
!13 = !{!"omnipotent char", !14, i64 0}
!14 = !{!"Simple C/C++ TBAA"}
!15 = !MDLocation(line: 17, column: 8, scope: !16)
-!16 = !{!"0xb\0017\008\002", !1, !17} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!17 = !{!"0xb\0017\008\001", !1, !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!18 = !{!"0xb\0017\003\000", !1, !4} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!16 = distinct !MDLexicalBlock(line: 17, column: 8, file: !1, scope: !17)
+!17 = distinct !MDLexicalBlock(line: 17, column: 8, file: !1, scope: !18)
+!18 = distinct !MDLexicalBlock(line: 17, column: 3, file: !1, scope: !4)
!19 = !MDLocation(line: 18, column: 5, scope: !20)
-!20 = !{!"0xb\0017\0027\000", !1, !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!20 = distinct !MDLexicalBlock(line: 17, column: 27, file: !1, scope: !18)
!21 = !{!13, !13, i64 0}
!22 = !MDLocation(line: 20, column: 3, scope: !4)
!23 = !MDLocation(line: 21, column: 3, scope: !4)
diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index d8e5403..0debb33 100644
--- a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -17,7 +17,7 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%conv = sitofp i32 1 to x86_fp80
- %arrayidx = getelementptr inbounds [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv
+ %arrayidx = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv
store x86_fp80 %conv, x86_fp80* %arrayidx, align 16
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32