26 files changed, 235 insertions, 48 deletions
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll
index 39d52d3..2341377 100644
--- a/test/CodeGen/NVPTX/annotations.ll
+++ b/test/CodeGen/NVPTX/annotations.ll
@@ -33,21 +33,14 @@ define void @kernel_func_minctasm(float* %a) {
 
 !nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8}
 
-!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1}
-!2 = metadata !{void (float*)* @kernel_func_maxntid,
-                metadata !"maxntidx", i32 10,
-                metadata !"maxntidy", i32 20,
-                metadata !"maxntidz", i32 30}
-
-!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1}
-!4 = metadata !{void (float*)* @kernel_func_reqntid,
-                metadata !"reqntidx", i32 11,
-                metadata !"reqntidy", i32 22,
-                metadata !"reqntidz", i32 33}
-
-!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1}
-!6 = metadata !{void (float*)* @kernel_func_minctasm,
-                metadata !"minctasm", i32 42}
-
-!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1}
-!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1}
+!1 = !{void (float*)* @kernel_func_maxntid, !"kernel", i32 1}
+!2 = !{void (float*)* @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}
+
+!3 = !{void (float*)* @kernel_func_reqntid, !"kernel", i32 1}
+!4 = !{void (float*)* @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}
+
+!5 = !{void (float*)* @kernel_func_minctasm, !"kernel", i32 1}
+!6 = !{void (float*)* @kernel_func_minctasm, !"minctasm", i32 42}
+
+!7 = !{i64 addrspace(1)* @texture, !"texture", i32 1}
+!8 = !{i64 addrspace(1)* @surface, !"surface", i32 1}
diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll
index 157b28c..cacffce 100644
--- a/test/CodeGen/NVPTX/bug21465.ll
+++ b/test/CodeGen/NVPTX/bug21465.ll
@@ -21,4 +21,4 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 
 !nvvm.annotations = !{!0}
 
-!0 = metadata !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, metadata !"kernel", i32 1}
+!0 = !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/bug22246.ll b/test/CodeGen/NVPTX/bug22246.ll
new file mode 100644
index 0000000..70e7e12
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug22246.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-LABEL: _Z3foobbbPb
+define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, i8* nocapture %output) {
+entry:
+; CHECK: selp.b32       %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}
+  %.sink.v = select i1 %p1, i1 %p2, i1 %p3
+  %frombool5 = zext i1 %.sink.v to i8
+  store i8 %frombool5, i8* %output, align 1
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/bug22322.ll b/test/CodeGen/NVPTX/bug22322.ll
new file mode 100644
index 0000000..19ee694
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug22322.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+%class.float3 = type { float, float, float }
+
+; Function Attrs: nounwind
+; CHECK-LABEL: some_kernel
+define void @some_kernel(%class.float3* nocapture %dst) #0 {
+_ZL11compute_vecRK6float3jb.exit:
+  %ret_vec.sroa.8.i = alloca float, align 4
+  %0 = tail call i32 @llvm.ptx.read.ctaid.x()
+  %1 = tail call i32 @llvm.ptx.read.ntid.x()
+  %2 = mul nsw i32 %1, %0
+  %3 = tail call i32 @llvm.ptx.read.tid.x()
+  %4 = add nsw i32 %2, %3
+  %5 = zext i32 %4 to i64
+  %6 = bitcast float* %ret_vec.sroa.8.i to i8*
+  call void @llvm.lifetime.start(i64 4, i8* %6)
+  %7 = and i32 %4, 15
+  %8 = icmp eq i32 %7, 0
+  %9 = select i1 %8, float 0.000000e+00, float -1.000000e+00
+  store float %9, float* %ret_vec.sroa.8.i, align 4
+; CHECK: setp.lt.f32     %p{{[0-9]+}}, %f{{[0-9]+}}, 0f00000000
+  %10 = fcmp olt float %9, 0.000000e+00
+  %ret_vec.sroa.8.i.val = load float* %ret_vec.sroa.8.i, align 4
+  %11 = select i1 %10, float 0.000000e+00, float %ret_vec.sroa.8.i.val
+  call void @llvm.lifetime.end(i64 4, i8* %6)
+  %12 = getelementptr inbounds %class.float3* %dst, i64 %5, i32 0
+  store float 0.000000e+00, float* %12, align 4
+  %13 = getelementptr inbounds %class.float3* %dst, i64 %5, i32 1
+  store float %11, float* %13, align 4
+  %14 = getelementptr inbounds %class.float3* %dst, i64 %5, i32 2
+  store float 0.000000e+00, float* %14, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.ctaid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.ntid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.tid.x() #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!nvvm.annotations = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{void (%class.float3*)* @some_kernel, !"kernel", i32 1}
+!1 = !{!"clang version 3.5.1 (tags/RELEASE_351/final)"}
diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 83d4916..8483112 100644
--- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -63,4 +63,4 @@ declare void @callee(float*, i8*)
 
 !nvvm.annotations = !{!0}
 
-!0 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1}
+!0 = !{void (float*)* @kernel_func, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll
index 190a146..3b03442 100644
--- a/test/CodeGen/NVPTX/calling-conv.ll
+++ b/test/CodeGen/NVPTX/calling-conv.ll
@@ -27,4 +27,4 @@ define void @metadata_kernel(float* %a) {
 
 !nvvm.annotations = !{!1}
 
-!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1}
+!1 = !{void (float*)* @metadata_kernel, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/fma-assoc.ll b/test/CodeGen/NVPTX/fma-assoc.ll
new file mode 100644
index 0000000..fc04c61
--- /dev/null
+++ b/test/CodeGen/NVPTX/fma-assoc.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y, float %z,
+                                float %u, float %v) {
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul float %x, %y
+  %b = fmul float %u, %v
+  %c = fadd float %a, %b
+  %d = fadd float %c, %z
+  ret float %d
+}
+
+define ptx_device double @t1_f64(double %x, double %y, double %z,
+                                 double %u, double %v) {
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul double %x, %y
+  %b = fmul double %u, %v
+  %c = fadd double %a, %b
+  %d = fadd double %c, %z
+  ret double %d
+}
diff --git a/test/CodeGen/NVPTX/fma.ll b/test/CodeGen/NVPTX/fma.ll
index 14b5c45..6785a01 100644
--- a/test/CodeGen/NVPTX/fma.ll
+++ b/test/CodeGen/NVPTX/fma.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
 
+declare float @dummy_f32(float, float) #0
+declare double @dummy_f64(double, double) #0
+
 define ptx_device float @t1_f32(float %x, float %y, float %z) {
 ; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
 ; CHECK: ret;
@@ -8,6 +11,17 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) {
   ret float %b
 }
 
+define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul float %x, %y
+  %b = fadd float %a, %z
+  %c = fadd float %a, %w
+  %d = call float @dummy_f32(float %b, float %c)
+  ret float %d
+}
+
 define ptx_device double @t1_f64(double %x, double %y, double %z) {
 ; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
 ; CHECK: ret;
@@ -15,3 +29,14 @@ define ptx_device double @t1_f64(double %x, double %y, double %z) {
   %b = fadd double %a, %z
   ret double %b
 }
+
+define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul double %x, %y
+  %b = fadd double %a, %z
+  %c = fadd double %a, %w
+  %d = call double @dummy_f64(double %b, double %c)
+  ret double %d
+}
diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll
index 2a52798..fb63d6e 100644
--- a/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -23,4 +23,4 @@ define void @foo(i32* %a, i32* %b) {
 
 
 !nvvm.annotations = !{!0}
-!0 = metadata !{void (i32*, i32*)* @foo, metadata !"kernel", i32 1}
+!0 = !{void (i32*, i32*)* @foo, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll
index 1dd8ae4..e3fe08e 100644
--- a/test/CodeGen/NVPTX/i1-global.ll
+++ b/test/CodeGen/NVPTX/i1-global.ll
@@ -16,4 +16,4 @@ define void @foo(i1 %p, i32* %out) {
 
 
 !nvvm.annotations = !{!0}
-!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1}
+!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/i1-param.ll b/test/CodeGen/NVPTX/i1-param.ll
index f4df874..aac7196 100644
--- a/test/CodeGen/NVPTX/i1-param.ll
+++ b/test/CodeGen/NVPTX/i1-param.ll
@@ -16,4 +16,4 @@ define void @foo(i1 %p, i32* %out) {
 
 
 !nvvm.annotations = !{!0}
-!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1}
+!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/managed.ll b/test/CodeGen/NVPTX/managed.ll
index 4d7e781..d3f1604 100644
--- a/test/CodeGen/NVPTX/managed.ll
+++ b/test/CodeGen/NVPTX/managed.ll
@@ -8,4 +8,4 @@
 
 
 !nvvm.annotations = !{!0}
-!0 = metadata !{i32 addrspace(1)* @managed_g, metadata !"managed", i32 1}
+!0 = !{i32 addrspace(1)* @managed_g, !"managed", i32 1}
diff --git a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
index 64745fc..841bbc3 100644
--- a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
+++ b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -70,5 +70,5 @@ if.end17:                                         ; preds = %if.else13, %if.then
 ; Function Attrs: noduplicate nounwind
 declare void @llvm.cuda.syncthreads() #2
 
-!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1}
-!1 = metadata !{null, metadata !"align", i32 8}
+!0 = !{void (float*)* @foo, !"kernel", i32 1}
+!1 = !{null, !"align", i32 8}
diff --git a/test/CodeGen/NVPTX/nounroll.ll b/test/CodeGen/NVPTX/nounroll.ll
new file mode 100644
index 0000000..db96d2a
--- /dev/null
+++ b/test/CodeGen/NVPTX/nounroll.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; Compiled from the following CUDA code:
+;
+;   #pragma nounroll
+;   for (int i = 0; i < 2; ++i)
+;     output[i] = input[i];
+define void @nounroll(float* %input, float* %output) {
+; CHECK-LABEL: .visible .func nounroll(
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: .pragma "nounroll"
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %idxprom = sext i32 %i.06 to i64
+  %arrayidx = getelementptr inbounds float* %input, i64 %idxprom
+  %0 = load float* %arrayidx, align 4
+; CHECK: ld.f32
+  %arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom
+  store float %0, float* %arrayidx2, align 4
+; CHECK: st.f32
+  %inc = add nuw nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 2
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+; CHECK-NOT: ld.f32
+; CHECK-NOT: st.f32
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable"}
diff --git a/test/CodeGen/NVPTX/nvcl-param-align.ll b/test/CodeGen/NVPTX/nvcl-param-align.ll
new file mode 100644
index 0000000..c1a489f
--- /dev/null
+++ b/test/CodeGen/NVPTX/nvcl-param-align.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-unknown-nvcl"
+
+; CHECK-LABEL: .entry foo(
+define void @foo(i64 %img, i64 %sampler, <5 x float>* %v) {
+; The parameter alignment should be the next power of 2 of 5xsizeof(float),
+; which is 32.
+; CHECK: .param .u32 .ptr .align 32 foo_param_2
+  ret void
+}
+
+!nvvm.annotations = !{!1, !2, !3}
+!1 = !{void (i64, i64, <5 x float>*)* @foo, !"kernel", i32 1}
+!2 = !{void (i64, i64, <5 x float>*)* @foo, !"rdoimage", i32 0}
+!3 = !{void (i64, i64, <5 x float>*)* @foo, !"sampler", i32 1}
diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll
index 4aeff09..e8782ea 100644
--- a/test/CodeGen/NVPTX/refl1.ll
+++ b/test/CodeGen/NVPTX/refl1.ll
@@ -36,4 +36,4 @@ attributes #2 = { alwaysinline inlinehint nounwind readnone }
 
 !nvvm.annotations = !{!0}
 
-!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1}
+!0 = !{void (float*)* @foo, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll
index ab6f423..1b41361 100644
--- a/test/CodeGen/NVPTX/simple-call.ll
+++ b/test/CodeGen/NVPTX/simple-call.ll
@@ -23,4 +23,4 @@ define void @kernel_func(float* %a) {
 
 !nvvm.annotations = !{!1}
 
-!1 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1}
+!1 = !{void (float*)* @kernel_func, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/surf-read-cuda.ll b/test/CodeGen/NVPTX/surf-read-cuda.ll
index 10a1ecc..ed02134 100644
--- a/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -47,7 +47,7 @@ define void @bar(float* %red, i32 %idx) {
 
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
-!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}
+!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
+!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
 
diff --git a/test/CodeGen/NVPTX/surf-read.ll b/test/CodeGen/NVPTX/surf-read.ll
index a69d03e..7383722 100644
--- a/test/CodeGen/NVPTX/surf-read.ll
+++ b/test/CodeGen/NVPTX/surf-read.ll
@@ -16,5 +16,5 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2}
-!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (i64, float*, i32)* @foo, metadata !"rdwrimage", i32 0}
+!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (i64, float*, i32)* @foo, !"rdwrimage", i32 0}
diff --git a/test/CodeGen/NVPTX/surf-write-cuda.ll b/test/CodeGen/NVPTX/surf-write-cuda.ll
index 654c47f..da55a24 100644
--- a/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -36,7 +36,7 @@ define void @bar(i32 %val, i32 %idx) {
 
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (i32, i32)* @bar, metadata !"kernel", i32 1}
-!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}
+!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (i32, i32)* @bar, !"kernel", i32 1}
+!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
 
diff --git a/test/CodeGen/NVPTX/surf-write.ll b/test/CodeGen/NVPTX/surf-write.ll
index 880231f..5098d2a 100644
--- a/test/CodeGen/NVPTX/surf-write.ll
+++ b/test/CodeGen/NVPTX/surf-write.ll
@@ -12,5 +12,5 @@ define void @foo(i64 %img, i32 %val, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2}
-!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (i64, i32, i32)* @foo, metadata !"wroimage", i32 0}
+!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (i64, i32, i32)* @foo, !"wroimage", i32 0}
diff --git a/test/CodeGen/NVPTX/tex-read-cuda.ll b/test/CodeGen/NVPTX/tex-read-cuda.ll
index ee0cefa..c5b5600 100644
--- a/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -41,6 +41,6 @@ define void @bar(float* %red, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
-!3 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1}
+!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
+!3 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
diff --git a/test/CodeGen/NVPTX/tex-read.ll b/test/CodeGen/NVPTX/tex-read.ll
index 55e4bfc..6e0fda6 100644
--- a/test/CodeGen/NVPTX/tex-read.ll
+++ b/test/CodeGen/NVPTX/tex-read.ll
@@ -15,6 +15,6 @@ define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"rdoimage", i32 0}
-!3 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"sampler", i32 1}
+!1 = !{void (i64, i64, float*, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (i64, i64, float*, i32)* @foo, !"rdoimage", i32 0}
+!3 = !{void (i64, i64, float*, i32)* @foo, !"sampler", i32 1}
diff --git a/test/CodeGen/NVPTX/texsurf-queries.ll b/test/CodeGen/NVPTX/texsurf-queries.ll
index c7637cc..e56eb5d 100644
--- a/test/CodeGen/NVPTX/texsurf-queries.ll
+++ b/test/CodeGen/NVPTX/texsurf-queries.ll
@@ -99,5 +99,5 @@ define i32 @s3() {
 
 
 !nvvm.annotations = !{!1, !2}
-!1 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1}
-!2 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}
+!1 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
+!2 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
diff --git a/test/CodeGen/NVPTX/vector-global.ll b/test/CodeGen/NVPTX/vector-global.ll
new file mode 100644
index 0000000..a463bee
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-global.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+@g1 = external global <4 x i32> ; external global variable
+; CHECK: .extern .global .align 16 .b8 g1[16];
+@g2 = global <4 x i32> zeroinitializer ; module-level global variable
+; CHECK: .visible .global .align 16 .b8 g2[16];
diff --git a/test/CodeGen/NVPTX/weak-linkage.ll b/test/CodeGen/NVPTX/weak-linkage.ll
index 7a13357..5df57b2 100644
--- a/test/CodeGen/NVPTX/weak-linkage.ll
+++ b/test/CodeGen/NVPTX/weak-linkage.ll
@@ -1,11 +1,17 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 
-
+; CHECK: // .weak foo
 ; CHECK: .weak .func foo
 define weak void @foo() {
   ret void
 }
 
+; CHECK: // .weak baz
+; CHECK: .weak .func baz
+define weak_odr void @baz() {
+  ret void
+}
+
 ; CHECK: .visible .func bar
 define void @bar() {
   ret void