diff options
Diffstat (limited to 'test/Transforms/LoopVectorize/X86')
36 files changed, 527 insertions, 352 deletions
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll index 29d74a0..c400c76 100644 --- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll +++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll @@ -21,8 +21,8 @@ for.body: ; preds = %for.body, %entry ; CHECK: LV: We can vectorize this loop! %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds [255 x i32]* @a, i64 0, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [255 x i32], [255 x i32]* @a, i64 0, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %red.05 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 255 @@ -42,5 +42,6 @@ for.end: ; preds = %for.body ; CHECK: [[vect]] = distinct !{[[vect]], [[width:![0-9]+]], [[unroll:![0-9]+]]} ; CHECK: [[width]] = !{!"llvm.loop.vectorize.width", i32 1} ; CHECK: [[unroll]] = !{!"llvm.loop.interleave.count", i32 1} -; CHECK: [[scalar]] = distinct !{[[scalar]], [[width]], [[unroll]]} +; CHECK: [[scalar]] = distinct !{[[scalar]], [[runtime_unroll:![0-9]+]], [[width]], [[unroll]]} +; CHECK: [[runtime_unroll]] = !{!"llvm.loop.unroll.runtime.disable"} diff --git a/test/Transforms/LoopVectorize/X86/assume.ll b/test/Transforms/LoopVectorize/X86/assume.ll index a94e24d..4fd378d 100644 --- a/test/Transforms/LoopVectorize/X86/assume.ll +++ b/test/Transforms/LoopVectorize/X86/assume.ll @@ -22,12 +22,12 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv - %0 = load float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 %cmp1 = fcmp ogt float %0, 1.000000e+02 tail call void @llvm.assume(i1 %cmp1) %add = fadd float %0, 1.000000e+00 - %arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv + %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv store float %add, float* %arrayidx5, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv, 1599 @@ -48,13 +48,13 @@ attributes #1 = { nounwind } ; Function Attrs: nounwind uwtable define void @test2(%struct.data* nocapture readonly %d) #0 { entry: - %b = getelementptr inbounds %struct.data* %d, i64 0, i32 1 - %0 = load float** %b, align 8 + %b = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 1 + %0 = load float*, float** %b, align 8 %ptrint = ptrtoint float* %0 to i64 %maskedptr = and i64 %ptrint, 31 %maskcond = icmp eq i64 %maskedptr, 0 - %a = getelementptr inbounds %struct.data* %d, i64 0, i32 0 - %1 = load float** %a, align 8 + %a = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 0 + %1 = load float*, float** %a, align 8 %ptrint2 = ptrtoint float* %1 to i64 %maskedptr3 = and i64 %ptrint2, 31 %maskcond4 = icmp eq i64 %maskedptr3, 0 @@ -84,11 +84,11 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] tail call void @llvm.assume(i1 %maskcond) - %arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv - %2 = load float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, float* %0, i64 %indvars.iv + %2 = load float, float* %arrayidx, align 4 %add = fadd float %2, 1.000000e+00 tail call void @llvm.assume(i1 %maskcond4) - %arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv + %arrayidx5 = getelementptr inbounds float, float* %1, i64 %indvars.iv store float %add, float* %arrayidx5, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv, 1599 diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll index 01c9125..37977c4 100644 --- a/test/Transforms/LoopVectorize/X86/avx1.ll +++ b/test/Transforms/LoopVectorize/X86/avx1.ll @@ -12,8 +12,8 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta .lr.ph: ; preds = %0, %.lr.ph %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] - %2 = getelementptr inbounds float* %a, i64 %indvars.iv - %3 = load float* %2, align 4 + %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv + %3 = load float, float* %2, align 4 %4 = fmul float %3, 3.000000e+00 store float %4, float* %2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 @@ -35,8 +35,8 @@ define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp { .lr.ph: ; preds = %0, %.lr.ph %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] - %2 = getelementptr inbounds i64* %a, i64 %indvars.iv - %3 = load i64* %2, align 4 + %2 = getelementptr inbounds i64, i64* %a, i64 %indvars.iv + %3 = load i64, i64* %2, align 4 %4 = add i64 %3, 3 store i64 %4, i64* %2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 diff --git a/test/Transforms/LoopVectorize/X86/avx512.ll b/test/Transforms/LoopVectorize/X86/avx512.ll index a220866..754e859 100644 --- a/test/Transforms/LoopVectorize/X86/avx512.ll +++ b/test/Transforms/LoopVectorize/X86/avx512.ll @@ -20,7 +20,7 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %n, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll index f4c07b4..d75b1d9 100644 --- a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll +++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll @@ -15,10 +15,10 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %shl = ashr i32 %0, 3 - %arrayidx2 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv store i32 %shl, i32* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 0af562d..eb2a2a5 100644 --- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -13,7 +13,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun .lr.ph: ; preds = %0, %.lr.ph %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 3, %0 ] %2 = trunc i64 %indvars.iv to i8 - %3 = getelementptr inbounds i8* %A, i64 %indvars.iv + %3 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv store i8 %2, i8* %3, align 1 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -35,7 +35,7 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] %add = add nsw i64 %indvars.iv, 3 %tofp = sitofp i64 %add to float - %gep = getelementptr inbounds float* %B, i64 %indvars.iv + %gep = getelementptr inbounds float, float* %B, i64 %indvars.iv store float %tofp, float* %gep, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll index 98718e1..0136571 100644 --- a/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -21,15 +21,15 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %0 = shl nsw i64 %indvars.iv, 1 - %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0 - %1 = load i32* %arrayidx, align 8 + %arrayidx = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %0 + %1 = load i32, i32* %arrayidx, align 8 %idxprom1 = sext i32 %1 to i64 - %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1 - %2 = load i32* %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv - %3 = load i32* %arrayidx4, align 4 + %arrayidx2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %idxprom1 + %2 = load i32, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 %indvars.iv + %3 = load i32, i32* %arrayidx4, align 4 %idxprom5 = sext i32 %3 to i64 - %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5 + %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %idxprom5 store i32 %2, i32* %arrayidx6, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll index 529ed88..4a56d6b 100644 --- a/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll +++ b/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll @@ -20,10 +20,10 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds [10000 x float]* @float_array, i64 0, i64 %indvars.iv - %1 = load float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [10000 x float], [10000 x float]* @float_array, i64 0, i64 %indvars.iv + %1 = load float, float* %arrayidx, align 4 %conv = fptoui float %1 to i32 - %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv store i32 %conv, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll index ef3e3be..c066afc 100644 --- a/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll +++ b/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll @@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx" define void @convert() { entry: - %0 = load i32* @n, align 4 + %0 = load i32, i32* @n, align 4 %cmp4 = icmp eq i32 %0, 0 br i1 %cmp4, label %for.end, label %for.body.preheader @@ -22,10 +22,10 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds [10000 x double]* @double_array, i64 0, i64 %indvars.iv - %1 = load double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds [10000 x double], [10000 x double]* @double_array, i64 0, i64 %indvars.iv + %1 = load double, double* %arrayidx, align 8 %conv = fptoui double %1 to i32 - %arrayidx2 = getelementptr inbounds [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv store i32 %conv, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %2 = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll index 23e6227..b3a0710 100644 --- a/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll +++ b/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll @@ -11,10 +11,10 @@ entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv - %tmp = load float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv + %tmp = load float, float* %arrayidx, align 4 %conv = fptosi float %tmp to i8 - %arrayidx2 = getelementptr inbounds i8* %a, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv store i8 %conv, i8* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 256 diff --git a/test/Transforms/LoopVectorize/X86/gather-cost.ll b/test/Transforms/LoopVectorize/X86/gather-cost.ll index 09363d6..f0e6c8f 100644 --- a/test/Transforms/LoopVectorize/X86/gather-cost.ll +++ b/test/Transforms/LoopVectorize/X86/gather-cost.ll @@ -31,32 +31,32 @@ for.body: %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ] %add = add i64 %v.055, %offset %mul = mul i64 %add, 3 - %arrayidx = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %mul - %0 = load float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds [512 x float]* @kernel, i64 0, i64 %v.055 - %1 = load float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055 + %1 = load float, float* %arrayidx2, align 4 %mul3 = fmul fast float %0, %1 - %arrayidx4 = getelementptr inbounds [512 x float]* @kernel2, i64 0, i64 %v.055 - %2 = load float* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055 + %2 = load float, float* %arrayidx4, align 4 %mul5 = fmul fast float %mul3, %2 - %arrayidx6 = getelementptr inbounds [512 x float]* @kernel3, i64 0, i64 %v.055 - %3 = load float* %arrayidx6, align 4 + %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055 + %3 = load float, float* %arrayidx6, align 4 %mul7 = fmul fast float %mul5, %3 - %arrayidx8 = getelementptr inbounds [512 x float]* @kernel4, i64 0, i64 %v.055 - %4 = load float* %arrayidx8, align 4 + %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055 + %4 = load float, float* %arrayidx8, align 4 %mul9 = fmul fast float %mul7, %4 %add10 = fadd fast float %r.057, %mul9 %arrayidx.sum = add i64 %mul, 1 - %arrayidx11 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum - %5 = load float* %arrayidx11, align 4 + %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum + %5 = load float, float* %arrayidx11, align 4 %mul13 = fmul fast float %1, %5 %mul15 = fmul fast float %2, %mul13 %mul17 = fmul fast float %3, %mul15 %mul19 = fmul fast float %4, %mul17 %add20 = fadd fast float %g.056, %mul19 %arrayidx.sum52 = add i64 %mul, 2 - %arrayidx21 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52 - %6 = load float* %arrayidx21, align 4 + %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52 + %6 = load float, float* %arrayidx21, align 4 %mul23 = fmul fast float %1, %6 %mul25 = fmul fast float %2, %mul23 %mul27 = fmul fast float %3, %mul25 diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll index 05403cd..c581f4b 100644 --- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -28,12 +28,12 @@ define void @example1() nounwind uwtable ssp { ; <label>:1 ; preds = %1, %0 %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] - %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv - %3 = load i32* %2, align 4 - %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv - %5 = load i32* %4, align 4 + %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 %6 = add nsw i32 %5, %3 - %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv store i32 %6, i32* %7, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -61,10 +61,10 @@ define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, ; <label>:1 ; preds = %1, %0 %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] - %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv - %3 = load i16* %2, align 2 + %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv + %3 = load i16, i16* %2, align 2 %4 = sext i16 %3 to i32 - %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv + %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv store i32 %4, i32* %5, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index 46efaf0..cbba530 100644 --- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -14,8 +14,8 @@ entry: br i1 %cmp27, label %for.body3.lr.ph.us, label %for.end15 for.end.us: ; preds = %for.body3.us - %arrayidx9.us = getelementptr inbounds i32* %b, i64 %indvars.iv33 - %0 = load i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx9.us = getelementptr inbounds i32, i32* %b, i64 %indvars.iv33 + %0 = load i32, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 %add10.us = add nsw i32 %0, 3 store i32 %add10.us, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next34 = add i64 %indvars.iv33, 1 @@ -28,8 +28,8 @@ for.body3.us: ; preds = %for.body3.us, %for. %1 = trunc i64 %indvars.iv29 to i32 %add4.us = add i32 %add.us, %1 %idxprom.us = sext i32 %add4.us to i64 - %arrayidx.us = getelementptr inbounds i32* %a, i64 %idxprom.us - %2 = load i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx.us = getelementptr inbounds i32, i32* %a, i64 %idxprom.us + %2 = load i32, i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3 %add5.us = add nsw i32 %2, 1 store i32 %add5.us, i32* %arrayidx7.us, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next30 = add i64 %indvars.iv29, 1 @@ -41,7 +41,7 @@ for.body3.lr.ph.us: ; preds = %for.end.us, %entry %indvars.iv33 = phi i64 [ %indvars.iv.next34, %for.end.us ], [ 0, %entry ] %3 = trunc i64 %indvars.iv33 to i32 %add.us = add i32 %3, %k - %arrayidx7.us = getelementptr inbounds i32* %a, i64 %indvars.iv33 + %arrayidx7.us = getelementptr inbounds i32, i32* %a, i64 %indvars.iv33 br label %for.body3.us for.end15: ; preds = %for.end.us, %entry diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 9e2de80..8c375cc 100644 --- a/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -46,35 +46,35 @@ entry: br label %for.cond for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 + %0 = load i32, i32* %i, align 4 %cmp = icmp slt i32 %0, 10000 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 + %1 = load i32, i32* %i, align 4 %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 %cmp1 = icmp slt i32 %3, 100 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %for.body - %4 = load i32* %i, align 4 + %4 = load i32, i32* %i, align 4 %idxprom2 = sext i32 %4 to i64 - %5 = load i32** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2 - %6 = load i32* %arrayidx3, align 4 - %7 = load i32* %i, align 4 + %5 = load i32*, i32** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 %idxprom2 + %6 = load i32, i32* %arrayidx3, align 4 + %7 = load i32, i32* %i, align 4 %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 + %8 = load i32*, i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4 + %9 = load i32, i32* %arrayidx5, align 4 %add = add nsw i32 %6, %9 - %10 = load i32* %i, align 4 + %10 = load i32, i32* %i, align 4 %idxprom6 = sext i32 %10 to i64 - %11 = load i32** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6 + %11 = load i32*, i32** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds i32, i32* %11, i64 %idxprom6 store i32 %add, i32* %arrayidx7, align 4 br label %if.end @@ -82,7 +82,7 @@ if.end: ; preds = %if.then, %for.body br label %for.inc for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 + %12 = load i32, i32* %i, align 4 %inc = add nsw i32 %12, 1 store i32 %inc, i32* %i, align 4 br label %for.cond @@ -130,36 +130,36 @@ entry: br label %for.cond for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 + %0 = load i32, i32* %i, align 4 %cmp = icmp slt i32 %0, 10000 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 + %1 = load i32, i32* %i, align 4 %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 %cmp1 = icmp slt i32 %3, 100 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %for.body - %4 = load i32* %i, align 4 + %4 = load i32, i32* %i, align 4 %idxprom2 = sext i32 %4 to i64 - %5 = load float** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds float* %5, i64 %idxprom2 - %6 = load float* %arrayidx3, align 4 - %7 = load i32* %i, align 4 + %5 = load float*, float** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds float, float* %5, i64 %idxprom2 + %6 = load float, float* %arrayidx3, align 4 + %7 = load i32, i32* %i, align 4 %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 + %8 = load i32*, i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4 + %9 = load i32, i32* %arrayidx5, align 4 %conv = sitofp i32 %9 to float %add = fadd float %6, %conv - %10 = load i32* %i, align 4 + %10 = load i32, i32* %i, align 4 %idxprom6 = sext i32 %10 to i64 - %11 = load float** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds float* %11, i64 %idxprom6 + %11 = load float*, float** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds float, float* %11, i64 %idxprom6 store float %add, float* %arrayidx7, align 4 br label %if.end @@ -167,7 +167,7 @@ if.end: ; preds = %if.then, %for.body br label %for.inc for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 + %12 = load i32, i32* %i, align 4 %inc = add nsw i32 %12, 1 store i32 %inc, i32* %i, align 4 br label %for.cond @@ -218,36 +218,36 @@ entry: br label %for.cond for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 + %0 = load i32, i32* %i, align 4 %cmp = icmp slt i32 %0, 10000 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 + %1 = load i32, i32* %i, align 4 %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 %cmp1 = icmp slt i32 %3, 100 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %for.body - %4 = load i32* %i, align 4 + %4 = load i32, i32* %i, align 4 %idxprom2 = sext i32 %4 to i64 - %5 = load double** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2 - %6 = load double* %arrayidx3, align 8 - %7 = load i32* %i, align 4 + %5 = load double*, double** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2 + %6 = load double, double* %arrayidx3, align 8 + %7 = load i32, i32* %i, align 4 %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 + %8 = load i32*, i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4 + %9 = load i32, i32* %arrayidx5, align 4 %conv = sitofp i32 %9 to double %add = fadd double %6, %conv - %10 = load i32* %i, align 4 + %10 = load i32, i32* %i, align 4 %idxprom6 = sext i32 %10 to i64 - %11 = load double** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6 + %11 = load double*, double** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds double, double* %11, i64 %idxprom6 store double %add, double* %arrayidx7, align 8 br label %if.end @@ -255,7 +255,7 @@ if.end: ; preds = %if.then, %for.body br label %for.inc for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 + %12 = load i32, i32* %i, align 4 %inc = add nsw i32 %12, 1 store i32 %inc, i32* %i, align 4 br label %for.cond @@ -297,37 +297,37 @@ entry: br label %for.cond for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 + %0 = load i32, i32* %i, align 4 %cmp = icmp slt i32 %0, 10000 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 + %1 = load i32, i32* %i, align 4 %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 %cmp1 = icmp slt i32 %3, 100 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %for.body - %4 = load i32* %i, align 4 + %4 = load i32, i32* %i, align 4 %mul = mul nsw i32 %4, 2 %idxprom2 = sext i32 %mul to i64 - %5 = load double** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2 - %6 = load double* %arrayidx3, align 8 - %7 = load i32* %i, align 4 + %5 = load double*, double** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2 + %6 = load double, double* %arrayidx3, align 8 + %7 = load i32, i32* %i, align 4 %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 + %8 = load i32*, i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4 + %9 = load i32, i32* %arrayidx5, align 4 %conv = sitofp i32 %9 to double %add = fadd double %6, %conv - %10 = load i32* %i, align 4 + %10 = load i32, i32* %i, align 4 %idxprom6 = sext i32 %10 to i64 - %11 = load double** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6 + %11 = load double*, double** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds double, double* %11, i64 %idxprom6 store double %add, double* %arrayidx7, align 8 br label %if.end @@ -335,7 +335,7 @@ if.end: ; preds = %if.then, %for.body br label %for.inc for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 + %12 = load i32, i32* %i, align 4 %inc = add nsw i32 %12, 1 store i32 %inc, i32* %i, align 4 br label %for.cond @@ -373,43 +373,43 @@ entry: br label %for.cond for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 + %0 = load i32, i32* %i, align 4 %cmp = icmp slt i32 %0, 10000 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 + %1 = load i32, i32* %i, align 4 %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 %cmp1 = icmp slt i32 %3, 100 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %for.body - %4 = load i32* %i, align 4 + %4 = load i32, i32* %i, align 4 %idxprom2 = sext i32 %4 to i64 - %5 = load i32** %B.addr, align 8 - %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2 - %6 = load i32* %arrayidx3, align 4 - %7 = load i32* %i, align 4 + %5 = load i32*, i32** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 %idxprom2 + %6 = load i32, i32* %arrayidx3, align 4 + %7 = load i32, i32* %i, align 4 %idxprom4 = sext i32 %7 to i64 - %8 = load i32** %trigger.addr, align 8 - %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 + %8 = load i32*, i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4 + %9 = load i32, i32* %arrayidx5, align 4 %add = add nsw i32 %6, %9 - %10 = load i32* %i, align 4 + %10 = load i32, i32* %i, align 4 %idxprom6 = sext i32 %10 to i64 - %11 = load i32** %A.addr, align 8 - %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6 - store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)), i32* %arrayidx7, align 4 + %11 = load i32*, i32** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds i32, i32* %11, i64 %idxprom6 + store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)), i32* %arrayidx7, align 4 br label %if.end if.end: ; preds = %if.then, %for.body br label %for.inc for.inc: ; preds = %if.end - %12 = load i32* %i, align 4 + %12 = load i32, i32* %i, align 4 %inc = add nsw i32 %12, 1 store i32 %inc, i32* %i, align 4 br label %for.cond @@ -459,30 +459,30 @@ entry: br label %for.cond for.cond: ; preds = %for.inc, %entry - %0 = load i32* %i, align 4 + %0 = load i32, i32* %i, align 4 %cmp = icmp sge i32 %0, 0 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond - %1 = load i32* %i, align 4 + %1 = load i32, i32* %i, align 4 %idxprom = sext i32 %1 to i64 - %2 = load i32** %trigger.addr, align 8 - %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom - %3 = load i32* %arrayidx, align 4 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 %cmp1 = icmp sgt i32 %3, 0 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %for.body - %4 = load i32* %i, align 4 + %4 = load i32, i32* %i, align 4 %idxprom2 = sext i32 %4 to i64 - %5 = load double** %in.addr, align 8 - %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2 - %6 = load double* %arrayidx3, align 8 + %5 = load double*, double** %in.addr, align 8 + %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2 + %6 = load double, double* %arrayidx3, align 8 %add = fadd double %6, 5.000000e-01 - %7 = load i32* %i, align 4 + %7 = load i32, i32* %i, align 4 %idxprom4 = sext i32 %7 to i64 - %8 = load double** %out.addr, align 8 - %arrayidx5 = getelementptr inbounds double* %8, i64 %idxprom4 + %8 = load double*, double** %out.addr, align 8 + %arrayidx5 = getelementptr inbounds double, double* %8, i64 %idxprom4 store double %add, double* %arrayidx5, align 8 br label %if.end @@ -490,7 +490,7 @@ if.end: ; preds = %if.then, %for.body br label %for.inc for.inc: ; preds = %if.end - %9 = load i32* %i, align 4 + %9 = load i32, i32* %i, align 4 %dec = add nsw i32 %9, -1 store i32 %dec, i32* %i, align 4 br label %for.cond diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll index 7feb66c..ba8e11e 100644 --- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -54,17 +54,17 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %N - %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 32 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 for.end: ; preds = %for.body - %1 = load i32* %a, align 4 + %1 = load i32, i32* %a, align 4 ret i32 %1 } @@ -105,17 +105,17 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %N - %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 32 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body - %1 = load i32* %a, align 4 + %1 = load i32, i32* %a, align 4 ret i32 %1 } @@ -156,17 +156,17 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %N - %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 32 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 for.end: ; preds = %for.body - %1 = load i32* %a, align 4 + %1 = load i32, i32* %a, align 4 ret i32 %1 } diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll index fd69dc4..bb972c4 100644 --- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll +++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll @@ -10,8 +10,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv - %0 = load float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 %add = fadd float %0, 1.000000e+00 store float %add, float* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 diff --git a/test/Transforms/LoopVectorize/X86/no-vector.ll b/test/Transforms/LoopVectorize/X86/no-vector.ll index 692eec9..4b464b0 100644 --- a/test/Transforms/LoopVectorize/X86/no-vector.ll +++ b/test/Transforms/LoopVectorize/X86/no-vector.ll @@ -8,8 +8,8 @@ entry: for.body: ; preds = %entry, %for.body %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ] %r.05 = phi i32 [ %xor, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8* %s, i32 %i.06 - %0 = load i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, i8* %s, i32 %i.06 + %0 = load i8, i8* %arrayidx, align 1 %conv = sext i8 %0 to i32 %xor = xor i32 %conv, %r.05 %inc = add nsw i32 %i.06, 1 diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll index ad01044..631361c 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll @@ -17,28 +17,28 @@ entry: br label %for.body for.body: ; preds = %for.body.for.body_crit_edge, %entry - %indvars.iv.reload = load i64* %indvars.iv.reg2mem - %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload - %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload - %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.reload = load i64, i64* %indvars.iv.reg2mem + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.reload + %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.reload + %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %idxprom3 = sext i32 %1 to i64 - %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 + %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3 store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next = add i64 %indvars.iv.reload, 1 ; A new store without the parallel metadata here: store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem - %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem - %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1 - %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next.reload1 = load i64, i64* %indvars.iv.next.reg2mem + %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next.reload1 + %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 - %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem + %indvars.iv.next.reload = load i64, i64* %indvars.iv.next.reg2mem %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop !3 for.body.for.body_crit_edge: ; preds = %for.body - %indvars.iv.next.reload2 = load i64* %indvars.iv.next.reg2mem + %indvars.iv.next.reload2 = load i64, i64* %indvars.iv.next.reg2mem store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem br label %for.body diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll index 22ab521..53061ed 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll @@ -20,16 +20,16 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 %idxprom3 = sext i32 %1 to i64 - %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 + %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3 store i32 %0, i32* %arrayidx4, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 - %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4 + %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next + %2 = load i32, i32* %arrayidx6, align 4 store i32 %2, i32* %arrayidx2, align 4 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 @@ -50,18 +50,18 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %idxprom3 = sext i32 %1 to i64 - %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 + %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3 ; This store might have originated from inlining a function with a parallel ; loop. Refers to a list with the "original loop reference" (!4) also included. store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5 %indvars.iv.next = add i64 %indvars.iv, 1 - %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next + %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 @@ -83,18 +83,18 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 %idxprom3 = sext i32 %1 to i64 - %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 + %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3 ; This refers to the loop marked with !7 which we are not in at the moment. ; It should prevent detecting as a parallel loop. store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7 %indvars.iv.next = add i64 %indvars.iv, 1 - %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6 + %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next + %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6 store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll index 054da8e..6bc738a 100644 --- a/test/Transforms/LoopVectorize/X86/powof2div.ll +++ b/test/Transforms/LoopVectorize/X86/powof2div.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" @Foo = common global %struct.anon zeroinitializer, align 4 ;CHECK-LABEL: @foo( -;CHECK: load <4 x i32>* +;CHECK: load <4 x i32>, <4 x i32>* ;CHECK: sdiv <4 x i32> ;CHECK: store <4 x i32> @@ -17,10 +17,10 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %div = sdiv i32 %0, 2 - %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv store i32 %div, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 100 diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll index 3957a55..3741b95 100644 --- a/test/Transforms/LoopVectorize/X86/reduction-crash.ll +++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll @@ -14,17 +14,17 @@ bb: br label %bb2 bb2: ; preds = %bb - %tmp = load double* null, align 8 + %tmp = load double, double* null, align 8 br i1 undef, label %bb3, label %bb12 bb3: ; preds = %bb3, %bb2 %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ] %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ] - %tmp6 = getelementptr inbounds [16 x double]* undef, i32 0, i32 %tmp5 - %tmp7 = load double* %tmp6, align 4 + %tmp6 = getelementptr inbounds [16 x double], [16 x double]* undef, i32 0, i32 %tmp5 + %tmp7 = load double, double* %tmp6, align 4 %tmp8 = add nsw i32 %tmp5, 1 %tmp9 = fadd fast double %tmp4, undef - %tmp10 = getelementptr inbounds float* %arg, i32 %tmp5 + %tmp10 = getelementptr inbounds float, float* %arg, i32 %tmp5 store float undef, float* %tmp10, align 4 %tmp11 = icmp eq i32 %tmp8, %arg1 br i1 %tmp11, label %bb12, label %bb3 diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll index 8c7a881..47c262b 100644 --- a/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/test/Transforms/LoopVectorize/X86/small-size.ll @@ -30,12 +30,12 @@ define void @example1() optsize { ; <label>:1 ; preds = %1, %0 %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] - %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv - %3 = load i32* %2, align 4 - %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv - %5 = load i32* %4, align 4 + %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 %6 = add nsw i32 %5, %3 - %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv store i32 %6, i32* %7, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -65,7 +65,7 @@ define void @example2(i32 %n, i32 %x) optsize { .lr.ph5: ; preds = %0, %.lr.ph5 %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ] - %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6 + %3 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv6 store i32 %x, i32* %3, align 4 %indvars.iv.next7 = add i64 %indvars.iv6, 1 %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32 @@ -76,12 +76,12 @@ define void @example2(i32 %n, i32 %x) optsize { %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ] %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ] %4 = add nsw i32 %.02, -1 - %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv - %6 = load i32* %5, align 4 - %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv - %8 = load i32* %7, align 4 + %5 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv + %6 = load i32, i32* %5, align 4 + %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv + %8 = load i32, i32* %7, align 4 %9 = and i32 %8, %6 - %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %10 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv store i32 %9, i32* %10, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %11 = icmp eq i32 %4, 0 @@ -104,9 +104,9 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ] %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ] %2 = add nsw i32 %.05, -1 - %3 = getelementptr inbounds i32* %.023, i64 1 - %4 = load i32* %.023, align 16 - %5 = getelementptr inbounds i32* %.014, i64 1 + %3 = getelementptr inbounds i32, i32* %.023, i64 1 + %4 = load i32, i32* %.023, align 16 + %5 = getelementptr inbounds i32, i32* %.014, i64 1 store i32 %4, i32* %.014, align 16 %6 = icmp eq i32 %2, 0 br i1 %6, label %._crit_edge, label %.lr.ph @@ -128,9 +128,9 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ] %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ] %2 = add nsw i32 %.05, -1 - %3 = getelementptr inbounds i32* %.023, i64 1 - %4 = load i32* %.023, align 16 - %5 = getelementptr inbounds i32* %.014, i64 1 + %3 = getelementptr inbounds i32, i32* %.023, i64 1 + %4 = load i32, i32* %.023, align 16 + %5 = getelementptr inbounds i32, i32* %.014, i64 1 store i32 %4, i32* %.014, align 16 %6 = icmp eq i32 %2, 0 br i1 %6, label %._crit_edge, label %.lr.ph @@ -152,11 +152,11 @@ define void @example23(i16* nocapture %src, i32* nocapture %dst) optsize { %.04 = phi i16* [ %src, %0 ], [ %2, %1 ] %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ] %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ] - %2 = getelementptr inbounds i16* %.04, i64 1 - %3 = load i16* %.04, align 2 + %2 = getelementptr inbounds i16, i16* %.04, i64 1 + %3 = load i16, i16* %.04, align 2 %4 = zext i16 %3 to i32 %5 = shl nuw nsw i32 %4, 7 - %6 = getelementptr inbounds i32* %.013, i64 1 + %6 = getelementptr inbounds i32, i32* %.013, i64 1 store i32 %5, i32* %.013, align 4 %7 = add nsw i32 %i.02, 1 %exitcond = icmp eq i32 %7, 256 @@ -178,11 +178,11 @@ define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst %.04 = phi i16* [ %src, %0 ], [ %2, %1 ] %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ] %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ] - %2 = getelementptr inbounds i16* %.04, i64 1 - %3 = load i16* %.04, align 2 + %2 = getelementptr inbounds i16, i16* %.04, i64 1 + %3 = load i16, i16* %.04, align 2 %4 = zext i16 %3 to i32 %5 = shl nuw nsw i32 %4, 7 - %6 = getelementptr inbounds i32* %.013, i64 1 + %6 = getelementptr inbounds i32, i32* %.013, i64 1 store i32 %5, i32* %.013, align 4 %7 = add nsw i32 %i.02, 1 %exitcond = icmp eq i32 %7, 256 diff --git a/test/Transforms/LoopVectorize/X86/struct-store.ll b/test/Transforms/LoopVectorize/X86/struct-store.ll index a995e43..4ff3b0e 100644 --- a/test/Transforms/LoopVectorize/X86/struct-store.ll +++ b/test/Transforms/LoopVectorize/X86/struct-store.ll @@ -15,7 +15,7 @@ entry: loop: %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ] - %tmp = getelementptr inbounds [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv + %tmp = getelementptr inbounds [16 x { i64, i64 }], [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv store { i64, i64 } { i64 ptrtoint (void ()* @fn to i64), i64 0 }, { i64, i64 }* %tmp, align 16 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll index a4ec694..c0bbb92 100644 --- a/test/Transforms/LoopVectorize/X86/tripcount.ll +++ b/test/Transforms/LoopVectorize/X86/tripcount.ll @@ -22,8 +22,8 @@ for.body.preheader: for.body: %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07 - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @big, i32 0, i32 %i.07 + %0 = load i32, i32* %arrayidx, align 4 %neg = xor i32 %0, -1 store i32 %neg, i32* %arrayidx, align 4 %inc = add nsw i32 %i.07, 1 diff --git a/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll index 86c32b2..38af11c 100644 --- a/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll +++ b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll @@ -12,10 +12,10 @@ entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i64* %a, i64 %indvars.iv - %tmp = load i64* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv + %tmp = load i64, i64* %arrayidx, align 4 %conv = uitofp i64 %tmp to double - %arrayidx2 = getelementptr inbounds double* %b, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds double, double* %b, i64 %indvars.iv store double %conv, double* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 256 diff --git a/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/test/Transforms/LoopVectorize/X86/unroll-pm.ll index 5064fec..52914b6 100644 --- a/test/Transforms/LoopVectorize/X86/unroll-pm.ll +++ b/test/Transforms/LoopVectorize/X86/unroll-pm.ll @@ -17,8 +17,8 @@ define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp { .lr.ph: ; preds = %0, %.lr.ph %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] - %2 = getelementptr inbounds i32* %A, i64 %indvars.iv - %3 = load i32* %2, align 4 + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 %4 = add nsw i32 %3, 6 store i32 %4, i32* %2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll index 716dc08..4411da3 100644 --- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll +++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll @@ -16,8 +16,8 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK-VECTOR: ret ; ; CHECK-SCALAR-LABEL: @foo( -; CHECK-SCALAR: load i32* -; CHECK-SCALAR-NOT: load i32* +; CHECK-SCALAR: load i32, i32* +; CHECK-SCALAR-NOT: load i32, i32* ; CHECK-SCALAR: store i32 ; CHECK-SCALAR-NOT: store i32 ; CHECK-SCALAR: ret @@ -26,8 +26,8 @@ define i32 @foo(i32* nocapture %A) nounwind uwtable ssp { ; <label>:1 ; preds = %1, %0 %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] - %2 = getelementptr inbounds i32* %A, i64 %indvars.iv - %3 = load i32* %2, align 4 + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 %4 = add nsw i32 %3, 6 store i32 %4, i32* %2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 @@ -57,8 +57,8 @@ define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp { .lr.ph: ; preds = %0, %.lr.ph %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] - %2 = getelementptr inbounds i32* %A, i64 %indvars.iv - %3 = load i32* %2, align 4 + %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 %4 = add nsw i32 %3, 6 store i32 %4, i32* %2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 @@ -86,10 +86,10 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv - %0 = load float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 %mul = fmul float %0, %N - %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv store float %mul, float* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 256 diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll index c684b4e..71b8290 100644 --- a/test/Transforms/LoopVectorize/X86/unroll_selection.ll +++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll @@ -16,8 +16,8 @@ define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp { ; <label>:2 ; preds = %2, %0 %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ] - %3 = getelementptr inbounds double* %A, i64 %indvars.iv - %4 = load double* %3, align 8 + %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv + %4 = load double, double* %3, align 8 %5 = fadd double %4, 3.000000e+00 %6 = fmul double %4, 2.000000e+00 %7 = fadd double %5, %6 @@ -58,8 +58,8 @@ define void @small_loop(i16* nocapture %A, i64 %n) nounwind uwtable ssp { .lr.ph: ; preds = %0, %.lr.ph %i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ] - %2 = getelementptr inbounds i16* %A, i64 %i.01 - %3 = load i16* %2, align 2 + %2 = getelementptr inbounds i16, i16* %A, i64 %i.01 + %3 = load i16, i16* %2, align 2 %4 = xor i16 %3, 3 store i16 %4, i16* %2, align 2 %5 = add i64 %i.01, 1 diff --git a/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/test/Transforms/LoopVectorize/X86/veclib-calls.ll new file mode 100644 index 0000000..62e0a44 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/veclib-calls.ll @@ -0,0 +1,182 @@ +; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK-LABEL: @sqrt_f32( +;CHECK: vsqrtf{{.*}}<4 x float> +;CHECK: ret void +declare float @sqrtf(float) nounwind readnone +define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @sqrtf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @exp_f32( +;CHECK: vexpf{{.*}}<4 x float> +;CHECK: ret void +declare float @expf(float) nounwind readnone +define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @expf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @log_f32( +;CHECK: vlogf{{.*}}<4 x float> +;CHECK: ret void +declare float @logf(float) nounwind readnone +define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @logf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call. +;CHECK-LABEL: @fabs_f32( +;CHECK: fabs{{.*}}<4 x float> +;CHECK: ret void +declare float @fabsf(float) nounwind readnone +define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @fabsf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Test that we can vectorize an intrinsic into a vector call. +;CHECK-LABEL: @exp_f32_intrin( +;CHECK: vexpf{{.*}}<4 x float> +;CHECK: ret void +declare float @llvm.exp.f32(float) nounwind readnone +define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @llvm.exp.f32(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Test that we don't vectorize arbitrary functions. +;CHECK-LABEL: @foo_f32( +;CHECK-NOT: foo{{.*}}<4 x float> +;CHECK: ret void +declare float @foo(float) nounwind readnone +define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @foo(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Test that we don't vectorize calls with nobuiltin attribute. +;CHECK-LABEL: @sqrt_f32_nobuiltin( +;CHECK-NOT: vsqrtf{{.*}}<4 x float> +;CHECK: ret void +define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll index a781fbe..c2a0fed 100644 --- a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll +++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll @@ -15,9 +15,9 @@ target triple = "x86_64-apple-macosx10.8.0" ; The source code for the test: ; ; #include <math.h> -; void foo(float* restrict A, float * restrict B, int size) +; void foo(float* restrict A, float * restrict B) ; { -; for (int i = 0; i < size; ++i) A[i] = sinf(B[i]); +; for (int i = 0; i < 1000; i+=2) A[i] = sinf(B[i]); ; } ; @@ -25,24 +25,20 @@ target triple = "x86_64-apple-macosx10.8.0" ; This loop will be vectorized, although the scalar cost is lower than any of vector costs, but vectorization is explicitly forced in metadata. ; -define void @vectorized(float* noalias nocapture %A, float* noalias nocapture %B, i32 %size) { +define void @vectorized(float* noalias nocapture %A, float* noalias nocapture %B) { entry: - %cmp6 = icmp sgt i32 %size, 0 - br i1 %cmp6, label %for.body.preheader, label %for.end - -for.body.preheader: br label %for.body for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1 %call = tail call float @llvm.sin.f32(float %0) - %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %size + %exitcond = icmp eq i32 %lftr.wideiv, 1000 br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !1 for.end.loopexit: @@ -59,24 +55,20 @@ for.end: ; This method will not be vectorized, as scalar cost is lower than any of vector costs. ; -define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture %B, i32 %size) { +define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture %B) { entry: - %cmp6 = icmp sgt i32 %size, 0 - br i1 %cmp6, label %for.body.preheader, label %for.end - -for.body.preheader: br label %for.body for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 %call = tail call float @llvm.sin.f32(float %0) - %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %size + %exitcond = icmp eq i32 %lftr.wideiv, 1000 br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !3 for.end.loopexit: diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index e39e6b5..8d139ac 100644 --- a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -29,10 +29,10 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1 - %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv - %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1 + %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1 + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1 %add = fadd fast float %0, %1 store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -55,10 +55,10 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 - %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv - %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %add = fadd fast float %0, %1 store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll index ece9895..5efabe1 100644 --- a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll +++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll @@ -15,12 +15,12 @@ define void @scalarselect(i1 %cond) { ; <label>:1 %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] - %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv - %3 = load i32* %2, align 4 - %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv - %5 = load i32* %4, align 4 + %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 %6 = add nsw i32 %5, %3 - %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv ; A scalar select has a cost of 1 on core2 ; CHECK: cost of 1 for VF 2 {{.*}} select i1 %cond, i32 %6, i32 0 @@ -42,12 +42,12 @@ define void @vectorselect(i1 %cond) { ; <label>:1 %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] - %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv - %3 = load i32* %2, align 4 - %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv - %5 = load i32* %4, align 4 + %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32, i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32, i32* %4, align 4 %6 = add nsw i32 %5, %3 - %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv %8 = icmp ult i64 %indvars.iv, 8 ; A vector select has a cost of 1 on core2 diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll index e57cfef..6cd3c9c 100644 --- a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll +++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll @@ -19,7 +19,7 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK: test_consecutive_store ; CHECK: The Widest type: 64 bits define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 { - %4 = load %0** %2, align 8 + %4 = load %0*, %0** %2, align 8 %5 = icmp eq %0** %0, %1 br i1 %5, label %12, label %6 @@ -29,7 +29,7 @@ define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwt ; <label>:7 ; preds = %7, %6 %8 = phi %0** [ %0, %6 ], [ %9, %7 ] store %0* %4, %0** %8, align 8 - %9 = getelementptr inbounds %0** %8, i64 1 + %9 = getelementptr inbounds %0*, %0** %8, i64 1 %10 = icmp eq %0** %9, %1 br i1 %10, label %11, label %7 @@ -61,12 +61,12 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable { ; <label>:3 ; preds = %3, %1 %4 = phi i64 [ 0, %1 ], [ %11, %3 ] - %5 = getelementptr inbounds [2048 x i16]* @q, i64 0, i64 %4 - %6 = load i16* %5, align 2 + %5 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q, i64 0, i64 %4 + %6 = load i16, i16* %5, align 2 %7 = sext i16 %6 to i64 %8 = add i64 %7, 1 %9 = inttoptr i64 %8 to i32* - %10 = getelementptr inbounds [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2 + %10 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2 store i32* %9, i32** %10, align 8 %11 = add i64 %4, 1 %12 = trunc i64 %11 to i32 @@ -100,8 +100,8 @@ define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable { ; <label>:1 ; preds = %1, %0 %2 = phi i64 [ 0, %0 ], [ %10, %1 ] %3 = phi i8 [ 0, %0 ], [ %9, %1 ] - %4 = getelementptr inbounds [1024 x i32*]* @ia, i32 0, i64 %2 - %5 = load i32** %4, align 4 + %4 = getelementptr inbounds [1024 x i32*], [1024 x i32*]* @ia, i32 0, i64 %2 + %5 = load i32*, i32** %4, align 4 %6 = ptrtoint i32* %5 to i64 %7 = trunc i64 %6 to i8 %8 = add i8 %3, 1 @@ -127,9 +127,9 @@ define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable { ; <label>:3 ; preds = %3, %1 %4 = phi i64 [ 0, %1 ], [ %10, %3 ] - %5 = getelementptr inbounds [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2 - %6 = getelementptr inbounds [2048 x i16]* @q2, i64 0, i64 %4 - %7 = load i32** %5, align 2 + %5 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2 + %6 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q2, i64 0, i64 %4 + %7 = load i32*, i32** %5, align 2 %8 = ptrtoint i32* %7 to i64 %9 = trunc i64 %8 to i16 store i16 %9, i16* %6, align 8 diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll index 011ce8e..60ad3c6 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -52,7 +52,7 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !16 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16 %0 = trunc i64 %indvars.iv to i32, !dbg !16 store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18 %cmp3 = icmp sle i32 %0, %Length, !dbg !22 @@ -74,7 +74,7 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !30 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !30 %0 = trunc i64 %indvars.iv to i32, !dbg !30 store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25 @@ -97,12 +97,12 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv, !dbg !35 - %0 = load i32* %arrayidx, align 4, !dbg !35, !tbaa !18 + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35 + %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18 %idxprom1 = sext i32 %0 to i64, !dbg !35 - %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !35 - %1 = load i32* %arrayidx2, align 4, !dbg !35, !tbaa !18 - %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !35 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35 + %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18 + %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35 store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32 @@ -122,40 +122,40 @@ attributes #0 = { nounwind } !llvm.module.flags = !{!9, !10} !llvm.ident = !{!11} -!0 = !{!"0x11\004\00clang version 3.5.0\001\00\006\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus] -!1 = !{!"source.cpp", !"."} +!0 = !MDCompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) +!1 = !MDFile(filename: "source.cpp", directory: ".") !2 = !{} !3 = !{!4, !7, !8} -!4 = !{!"0x2e\00test\00test\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*, i32)* @_Z4testPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test] -!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [./source.cpp] -!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!7 = !{!"0x2e\00test_disabled\00test_disabled\00\0010\000\001\000\006\00256\001\0010", !1, !5, !6, null, void (i32*, i32)* @_Z13test_disabledPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [test_disabled] -!8 = !{!"0x2e\00test_array_bounds\00test_array_bounds\00\0016\000\001\000\006\00256\001\0016", !1, !5, !6, null, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, !2} ; [ DW_TAG_subprogram ] [line 16] [def] [test_array_bounds] +!4 = !MDSubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z4testPii, variables: !2) +!5 = !MDFile(filename: "source.cpp", directory: ".") +!6 = !MDSubroutineType(types: !2) +!7 = !MDSubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z13test_disabledPii, variables: !2) +!8 = !MDSubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, function: void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, variables: !2) !9 = !{i32 2, !"Dwarf Version", i32 2} -!10 = !{i32 2, !"Debug Info Version", i32 2} +!10 = !{i32 2, !"Debug Info Version", i32 3} !11 = !{!"clang version 3.5.0"} !12 = !MDLocation(line: 3, column: 8, scope: !13) -!13 = !{!"0xb\003\003\000", !1, !4} ; [ DW_TAG_lexical_block ] +!13 = distinct !MDLexicalBlock(line: 3, column: 3, file: !1, scope: !4) !14 = !{!14, !15, !15} !15 = !{!"llvm.loop.vectorize.enable", i1 true} !16 = !MDLocation(line: 4, column: 5, scope: !17) -!17 = !{!"0xb\003\0036\000", !1, !13} ; [ DW_TAG_lexical_block ] +!17 = distinct !MDLexicalBlock(line: 3, column: 36, file: !1, scope: !13) !18 = !{!19, !19, i64 0} !19 = !{!"int", !20, i64 0} !20 = !{!"omnipotent char", !21, i64 0} !21 = !{!"Simple C/C++ TBAA"} !22 = !MDLocation(line: 5, column: 9, scope: !23) -!23 = !{!"0xb\005\009\000", !1, !17} ; [ DW_TAG_lexical_block ] +!23 = distinct !MDLexicalBlock(line: 5, column: 9, file: !1, scope: !17) !24 = !MDLocation(line: 8, column: 1, scope: !4) !25 = !MDLocation(line: 12, column: 8, scope: !26) -!26 = !{!"0xb\0012\003\000", !1, !7} ; [ DW_TAG_lexical_block ] +!26 = distinct !MDLexicalBlock(line: 12, column: 3, file: !1, scope: !7) !27 = !{!27, !28, !29} !28 = !{!"llvm.loop.interleave.count", i32 1} !29 = !{!"llvm.loop.vectorize.width", i32 1} !30 = !MDLocation(line: 13, column: 5, scope: !26) !31 = !MDLocation(line: 14, column: 1, scope: !7) !32 = !MDLocation(line: 18, column: 8, scope: !33) -!33 = !{!"0xb\0018\003\000", !1, !8} ; [ DW_TAG_lexical_block ] +!33 = distinct !MDLexicalBlock(line: 18, column: 3, file: !1, scope: !8) !34 = !{!34, !15} !35 = !MDLocation(line: 19, column: 5, scope: !33) !36 = !MDLocation(line: 20, column: 1, scope: !8) diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll index 16fe370..a4e895a 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll @@ -26,11 +26,11 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ], !dbg !19 - %arrayidx = getelementptr inbounds [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19 - %0 = load i8* %arrayidx, align 1, !dbg !19, !tbaa !21 + %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19 + %0 = load i8, i8* %arrayidx, align 1, !dbg !19, !tbaa !21 %conv = sext i8 %0 to i32, !dbg !19 - %arrayidx2 = getelementptr inbounds [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19 - %1 = load i8* %arrayidx2, align 1, !dbg !19, !tbaa !21 + %arrayidx2 = getelementptr inbounds [16 x i8], [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19 + %1 = load i8, i8* %arrayidx2, align 1, !dbg !19, !tbaa !21 %conv3 = sext i8 %1 to i32, !dbg !19 %sub = sub i32 %conv, %conv3, !dbg !19 %add = add nsw i32 %sub, %add8, !dbg !19 @@ -49,14 +49,14 @@ declare void @ibar(i32*) #1 !llvm.module.flags = !{!7, !8} !llvm.ident = !{!9} -!1 = !{!"vectorization-remarks.c", !"."} +!1 = !MDFile(filename: "vectorization-remarks.c", directory: ".") !2 = !{} !3 = !{!4} -!4 = !{!"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\006", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo] -!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c] -!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!4 = !MDSubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2) +!5 = !MDFile(filename: "vectorization-remarks.c", directory: ".") +!6 = !MDSubroutineType(types: !2) !7 = !{i32 2, !"Dwarf Version", i32 4} -!8 = !{i32 1, !"Debug Info Version", i32 2} +!8 = !{i32 1, !"Debug Info Version", i32 3} !9 = !{!"clang version 3.5.0 "} !10 = !MDLocation(line: 8, column: 3, scope: !4) !11 = !{!12, !12, i64 0} @@ -64,11 +64,11 @@ declare void @ibar(i32*) #1 !13 = !{!"omnipotent char", !14, i64 0} !14 = !{!"Simple C/C++ TBAA"} !15 = !MDLocation(line: 17, column: 8, scope: !16) -!16 = !{!"0xb\0017\008\002", !1, !17} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] -!17 = !{!"0xb\0017\008\001", !1, !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] -!18 = !{!"0xb\0017\003\000", !1, !4} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!16 = distinct !MDLexicalBlock(line: 17, column: 8, file: !1, scope: !17) +!17 = distinct !MDLexicalBlock(line: 17, column: 8, file: !1, scope: !18) +!18 = distinct !MDLexicalBlock(line: 17, column: 3, file: !1, scope: !4) !19 = !MDLocation(line: 18, column: 5, scope: !20) -!20 = !{!"0xb\0017\0027\000", !1, !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!20 = distinct !MDLexicalBlock(line: 17, column: 27, file: !1, scope: !18) !21 = !{!13, !13, i64 0} !22 = !MDLocation(line: 20, column: 3, scope: !4) !23 = !MDLocation(line: 21, column: 3, scope: !4) diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll index d8e5403..0debb33 100644 --- a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll +++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -17,7 +17,7 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %conv = sitofp i32 1 to x86_fp80 - %arrayidx = getelementptr inbounds [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv + %arrayidx = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv store x86_fp80 %conv, x86_fp80* %arrayidx, align 16 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 |