aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/LoopVectorize/X86
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms/LoopVectorize/X86')
-rw-r--r--test/Transforms/LoopVectorize/X86/already-vectorized.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/masked_load_store.ll502
-rw-r--r--test/Transforms/LoopVectorize/X86/metadata-enable.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/parallel-loops.ll10
-rw-r--r--test/Transforms/LoopVectorize/X86/small-size.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/vect.omp.force.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll4
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll74
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks.ll46
12 files changed, 588 insertions, 86 deletions
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index 9c69ba8..29d74a0 100644
--- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -39,8 +39,8 @@ for.end: ; preds = %for.body
}
; Now, we check for the Hint metadata
-; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]}
-; CHECK: [[width]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
-; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.interleave.count", i32 1}
-; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]}
+; CHECK: [[vect]] = distinct !{[[vect]], [[width:![0-9]+]], [[unroll:![0-9]+]]}
+; CHECK: [[width]] = !{!"llvm.loop.vectorize.width", i32 1}
+; CHECK: [[unroll]] = !{!"llvm.loop.interleave.count", i32 1}
+; CHECK: [[scalar]] = distinct !{[[scalar]], [[width]], [[unroll]]}
diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
index 0650d94..46efaf0 100644
--- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -50,7 +50,7 @@ for.end15: ; preds = %for.end.us, %entry
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-!3 = metadata !{metadata !4, metadata !5}
-!4 = metadata !{metadata !4}
-!5 = metadata !{metadata !5}
+!3 = !{!4, !5}
+!4 = !{!4}
+!5 = !{!5}
diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
new file mode 100644
index 0000000..9e2de80
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -0,0 +1,502 @@
+; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
+; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
+; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
+
+;AVX1-NOT: llvm.masked
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; The source code:
+;
+;void foo1(int *A, int *B, int *trigger) {
+;
+; for (int i=0; i<10000; i++) {
+; if (trigger[i] < 100) {
+; A[i] = B[i] + trigger[i];
+; }
+; }
+;}
+
+;AVX2-LABEL: @foo1
+;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX2: call <8 x i32> @llvm.masked.load.v8i32
+;AVX2: add nsw <8 x i32>
+;AVX2: call void @llvm.masked.store.v8i32
+;AVX2: ret void
+
+;AVX512-LABEL: @foo1
+;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX512: call <16 x i32> @llvm.masked.load.v16i32
+;AVX512: add nsw <16 x i32>
+;AVX512: call void @llvm.masked.store.v16i32
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo1(i32* %A, i32* %B, i32* %trigger) {
+entry:
+ %A.addr = alloca i32*, align 8
+ %B.addr = alloca i32*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store i32* %A, i32** %A.addr, align 8
+ store i32* %B, i32** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load i32** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2
+ %6 = load i32* %arrayidx3, align 4
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %add = add nsw i32 %6, %9
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load i32** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6
+ store i32 %add, i32* %arrayidx7, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; The source code:
+;
+;void foo2(float *A, float *B, int *trigger) {
+;
+; for (int i=0; i<10000; i++) {
+; if (trigger[i] < 100) {
+; A[i] = B[i] + trigger[i];
+; }
+; }
+;}
+
+;AVX2-LABEL: @foo2
+;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX2: call <8 x float> @llvm.masked.load.v8f32
+;AVX2: fadd <8 x float>
+;AVX2: call void @llvm.masked.store.v8f32
+;AVX2: ret void
+
+;AVX512-LABEL: @foo2
+;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX512: call <16 x float> @llvm.masked.load.v16f32
+;AVX512: fadd <16 x float>
+;AVX512: call void @llvm.masked.store.v16f32
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo2(float* %A, float* %B, i32* %trigger) {
+entry:
+ %A.addr = alloca float*, align 8
+ %B.addr = alloca float*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store float* %A, float** %A.addr, align 8
+ store float* %B, float** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load float** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds float* %5, i64 %idxprom2
+ %6 = load float* %arrayidx3, align 4
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %conv = sitofp i32 %9 to float
+ %add = fadd float %6, %conv
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load float** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds float* %11, i64 %idxprom6
+ store float %add, float* %arrayidx7, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; The source code:
+;
+;void foo3(double *A, double *B, int *trigger) {
+;
+; for (int i=0; i<10000; i++) {
+; if (trigger[i] < 100) {
+; A[i] = B[i] + trigger[i];
+; }
+; }
+;}
+
+;AVX2-LABEL: @foo3
+;AVX2: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
+;AVX2: call <4 x double> @llvm.masked.load.v4f64
+;AVX2: sitofp <4 x i32> %wide.load to <4 x double>
+;AVX2: fadd <4 x double>
+;AVX2: call void @llvm.masked.store.v4f64
+;AVX2: ret void
+
+;AVX512-LABEL: @foo3
+;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100,
+;AVX512: call <8 x double> @llvm.masked.load.v8f64
+;AVX512: sitofp <8 x i32> %wide.load to <8 x double>
+;AVX512: fadd <8 x double>
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+
+; Function Attrs: nounwind uwtable
+define void @foo3(double* %A, double* %B, i32* %trigger) #0 {
+entry:
+ %A.addr = alloca double*, align 8
+ %B.addr = alloca double*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store double* %A, double** %A.addr, align 8
+ store double* %B, double** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load double** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
+ %6 = load double* %arrayidx3, align 8
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %conv = sitofp i32 %9 to double
+ %add = fadd double %6, %conv
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load double** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6
+ store double %add, double* %arrayidx7, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; The source code:
+;
+;void foo4(double *A, double *B, int *trigger) {
+;
+; for (int i=0; i<10000; i++) {
+; if (trigger[i] < 100) {
+; A[i] = B[i*2] + trigger[i]; << non-cosecutive access
+; }
+; }
+;}
+
+;AVX2-LABEL: @foo4
+;AVX2-NOT: llvm.masked
+;AVX2: ret void
+
+;AVX512-LABEL: @foo4
+;AVX512-NOT: llvm.masked
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo4(double* %A, double* %B, i32* %trigger) {
+entry:
+ %A.addr = alloca double*, align 8
+ %B.addr = alloca double*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store double* %A, double** %A.addr, align 8
+ store double* %B, double** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %mul = mul nsw i32 %4, 2
+ %idxprom2 = sext i32 %mul to i64
+ %5 = load double** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
+ %6 = load double* %arrayidx3, align 8
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %conv = sitofp i32 %9 to double
+ %add = fadd double %6, %conv
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load double** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6
+ store double %add, double* %arrayidx7, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+@a = common global [1 x i32*] zeroinitializer, align 8
+@c = common global i32* null, align 8
+
+; The loop here should not be vectorized due to trapping
+; constant expression
+;AVX2-LABEL: @foo5
+;AVX2-NOT: llvm.masked
+;AVX2: store i32 sdiv
+;AVX2: ret void
+
+;AVX512-LABEL: @foo5
+;AVX512-NOT: llvm.masked
+;AVX512: store i32 sdiv
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo5(i32* %A, i32* %B, i32* %trigger) {
+entry:
+ %A.addr = alloca i32*, align 8
+ %B.addr = alloca i32*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store i32* %A, i32** %A.addr, align 8
+ store i32* %B, i32** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load i32** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2
+ %6 = load i32* %arrayidx3, align 4
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %add = add nsw i32 %6, %9
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load i32** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6
+ store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)), i32* %arrayidx7, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; Reverse loop
+;void foo6(double *in, double *out, unsigned size, int *trigger) {
+;
+; for (int i=SIZE-1; i>=0; i--) {
+; if (trigger[i] > 0) {
+; out[i] = in[i] + (double) 0.5;
+; }
+; }
+;}
+;AVX2-LABEL: @foo6
+;AVX2: icmp sgt <4 x i32> %reverse, zeroinitializer
+;AVX2: shufflevector <4 x i1>{{.*}}<4 x i32> <i32 3, i32 2, i32 1, i32 0>
+;AVX2: call <4 x double> @llvm.masked.load.v4f64
+;AVX2: fadd <4 x double>
+;AVX2: call void @llvm.masked.store.v4f64
+;AVX2: ret void
+
+;AVX512-LABEL: @foo6
+;AVX512: icmp sgt <8 x i32> %reverse, zeroinitializer
+;AVX512: shufflevector <8 x i1>{{.*}}<8 x i32> <i32 7, i32 6, i32 5, i32 4
+;AVX512: call <8 x double> @llvm.masked.load.v8f64
+;AVX512: fadd <8 x double>
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+
+define void @foo6(double* %in, double* %out, i32 %size, i32* %trigger) {
+entry:
+ %in.addr = alloca double*, align 8
+ %out.addr = alloca double*, align 8
+ %size.addr = alloca i32, align 4
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store double* %in, double** %in.addr, align 8
+ store double* %out, double** %out.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 4095, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp sge i32 %0, 0
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %3, 0
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load double** %in.addr, align 8
+ %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
+ %6 = load double* %arrayidx3, align 8
+ %add = fadd double %6, 5.000000e-01
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double* %8, i64 %idxprom4
+ store double %add, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %9 = load i32* %i, align 4
+ %dec = add nsw i32 %9, -1
+ store i32 %dec, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index 8e0ca41..7feb66c 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -170,7 +170,7 @@ for.end: ; preds = %for.body
ret i32 %1
}
-!0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 1}
-!2 = metadata !{metadata !2, metadata !3}
-!3 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 0}
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 1}
+!2 = !{!2, !3}
+!3 = !{!"llvm.loop.vectorize.enable", i1 0}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index 0b542a9..ad01044 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -46,4 +46,4 @@ for.end: ; preds = %for.body
ret void
}
-!3 = metadata !{metadata !3}
+!3 = !{!3}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index b580d73..22ab521 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -104,8 +104,8 @@ for.end: ; preds = %for.body
ret void
}
-!3 = metadata !{metadata !3}
-!4 = metadata !{metadata !4}
-!5 = metadata !{metadata !3, metadata !4}
-!6 = metadata !{metadata !6}
-!7 = metadata !{metadata !7}
+!3 = !{!3}
+!4 = !{!4}
+!5 = !{!3, !4}
+!6 = !{!6}
+!7 = !{!7}
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
index f9a0281..8c7a881 100644
--- a/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -139,7 +139,7 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
ret void
}
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!0 = !{!"branch_weights", i32 64, i32 4}
; We can't vectorize this one because we need a runtime ptr check.
;CHECK-LABEL: @example23(
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
index 074313b..a781fbe 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -52,8 +52,8 @@ for.end:
ret void
}
-!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
;
; This method will not be vectorized, as scalar cost is lower than any of vector costs.
@@ -89,5 +89,5 @@ for.end:
declare float @llvm.sin.f32(float) nounwind readnone
; Dummy metadata
-!3 = metadata !{metadata !3}
+!3 = !{!3}
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index 97c31a1..e39e6b5 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -43,8 +43,8 @@ for.end:
ret void
}
-!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
;
; This loop will not be vectorized as the trip count is below the threshold.
@@ -69,5 +69,5 @@ for.end:
ret void
}
-!3 = metadata !{metadata !3}
+!3 = !{!3}
diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
index 3b3a787..ece9895 100644
--- a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
@@ -50,8 +50,8 @@ define void @vectorselect(i1 %cond) {
%7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
%8 = icmp ult i64 %indvars.iv, 8
-; A vector select has a cost of 4 on core2
-; CHECK: cost of 4 for VF 2 {{.*}} select i1 %8, i32 %6, i32 0
+; A vector select has a cost of 1 on core2
+; CHECK: cost of 1 for VF 2 {{.*}} select i1 %8, i32 %6, i32 0
%sel = select i1 %8, i32 %6, i32 zeroinitializer
store i32 %sel, i32* %7, align 4
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 7bce11d..011ce8e 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -122,40 +122,40 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"source.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !7, metadata !8}
-!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z4testPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
-!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./source.cpp]
-!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !"0x2e\00test_disabled\00test_disabled\00\0010\000\001\000\006\00256\001\0010", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 10] [def] [test_disabled]
-!8 = metadata !{metadata !"0x2e\00test_array_bounds\00test_array_bounds\00\0016\000\001\000\006\00256\001\0016", metadata !1, metadata !5, metadata !6, null, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 16] [def] [test_array_bounds]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
-!11 = metadata !{metadata !"clang version 3.5.0"}
-!12 = metadata !{i32 3, i32 8, metadata !13, null}
-!13 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{metadata !14, metadata !15, metadata !15}
-!15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-!16 = metadata !{i32 4, i32 5, metadata !17, null}
-!17 = metadata !{metadata !"0xb\003\0036\000", metadata !1, metadata !13} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{metadata !19, metadata !19, i64 0}
-!19 = metadata !{metadata !"int", metadata !20, i64 0}
-!20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0}
-!21 = metadata !{metadata !"Simple C/C++ TBAA"}
-!22 = metadata !{i32 5, i32 9, metadata !23, null}
-!23 = metadata !{metadata !"0xb\005\009\000", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{i32 8, i32 1, metadata !4, null}
-!25 = metadata !{i32 12, i32 8, metadata !26, null}
-!26 = metadata !{metadata !"0xb\0012\003\000", metadata !1, metadata !7} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{metadata !27, metadata !28, metadata !29}
-!28 = metadata !{metadata !"llvm.loop.interleave.count", i32 1}
-!29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
-!30 = metadata !{i32 13, i32 5, metadata !26, null}
-!31 = metadata !{i32 14, i32 1, metadata !7, null}
-!32 = metadata !{i32 18, i32 8, metadata !33, null}
-!33 = metadata !{metadata !"0xb\0018\003\000", metadata !1, metadata !8} ; [ DW_TAG_lexical_block ]
-!34 = metadata !{metadata !34, metadata !15}
-!35 = metadata !{i32 19, i32 5, metadata !33, null}
-!36 = metadata !{i32 20, i32 1, metadata !8, null}
+!0 = !{!"0x11\004\00clang version 3.5.0\001\00\006\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"source.cpp", !"."}
+!2 = !{}
+!3 = !{!4, !7, !8}
+!4 = !{!"0x2e\00test\00test\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*, i32)* @_Z4testPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
+!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [./source.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x2e\00test_disabled\00test_disabled\00\0010\000\001\000\006\00256\001\0010", !1, !5, !6, null, void (i32*, i32)* @_Z13test_disabledPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [test_disabled]
+!8 = !{!"0x2e\00test_array_bounds\00test_array_bounds\00\0016\000\001\000\006\00256\001\0016", !1, !5, !6, null, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, !2} ; [ DW_TAG_subprogram ] [line 16] [def] [test_array_bounds]
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5.0"}
+!12 = !MDLocation(line: 3, column: 8, scope: !13)
+!13 = !{!"0xb\003\003\000", !1, !4} ; [ DW_TAG_lexical_block ]
+!14 = !{!14, !15, !15}
+!15 = !{!"llvm.loop.vectorize.enable", i1 true}
+!16 = !MDLocation(line: 4, column: 5, scope: !17)
+!17 = !{!"0xb\003\0036\000", !1, !13} ; [ DW_TAG_lexical_block ]
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !MDLocation(line: 5, column: 9, scope: !23)
+!23 = !{!"0xb\005\009\000", !1, !17} ; [ DW_TAG_lexical_block ]
+!24 = !MDLocation(line: 8, column: 1, scope: !4)
+!25 = !MDLocation(line: 12, column: 8, scope: !26)
+!26 = !{!"0xb\0012\003\000", !1, !7} ; [ DW_TAG_lexical_block ]
+!27 = !{!27, !28, !29}
+!28 = !{!"llvm.loop.interleave.count", i32 1}
+!29 = !{!"llvm.loop.vectorize.width", i32 1}
+!30 = !MDLocation(line: 13, column: 5, scope: !26)
+!31 = !MDLocation(line: 14, column: 1, scope: !7)
+!32 = !MDLocation(line: 18, column: 8, scope: !33)
+!33 = !{!"0xb\0018\003\000", !1, !8} ; [ DW_TAG_lexical_block ]
+!34 = !{!34, !15}
+!35 = !MDLocation(line: 19, column: 5, scope: !33)
+!36 = !MDLocation(line: 20, column: 1, scope: !8)
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index 14e541a..16fe370 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -49,26 +49,26 @@ declare void @ibar(i32*) #1
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\006", metadata !1, metadata !5, metadata !6, null, i32 (i32)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo]
-!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c]
-!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 2}
-!9 = metadata !{metadata !"clang version 3.5.0 "}
-!10 = metadata !{i32 8, i32 3, metadata !4, null}
-!11 = metadata !{metadata !12, metadata !12, i64 0}
-!12 = metadata !{metadata !"int", metadata !13, i64 0}
-!13 = metadata !{metadata !"omnipotent char", metadata !14, i64 0}
-!14 = metadata !{metadata !"Simple C/C++ TBAA"}
-!15 = metadata !{i32 17, i32 8, metadata !16, null}
-!16 = metadata !{metadata !"0xb\0017\008\002", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!17 = metadata !{metadata !"0xb\0017\008\001", metadata !1, metadata !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!18 = metadata !{metadata !"0xb\0017\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!19 = metadata !{i32 18, i32 5, metadata !20, null}
-!20 = metadata !{metadata !"0xb\0017\0027\000", metadata !1, metadata !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!21 = metadata !{metadata !13, metadata !13, i64 0}
-!22 = metadata !{i32 20, i32 3, metadata !4, null}
-!23 = metadata !{i32 21, i32 3, metadata !4, null}
+!1 = !{!"vectorization-remarks.c", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\006", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo]
+!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5.0 "}
+!10 = !MDLocation(line: 8, column: 3, scope: !4)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !MDLocation(line: 17, column: 8, scope: !16)
+!16 = !{!"0xb\0017\008\002", !1, !17} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!17 = !{!"0xb\0017\008\001", !1, !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!18 = !{!"0xb\0017\003\000", !1, !4} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!19 = !MDLocation(line: 18, column: 5, scope: !20)
+!20 = !{!"0xb\0017\0027\000", !1, !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!21 = !{!13, !13, i64 0}
+!22 = !MDLocation(line: 20, column: 3, scope: !4)
+!23 = !MDLocation(line: 21, column: 3, scope: !4)