diff options
Diffstat (limited to 'test/CodeGen/NVPTX')
33 files changed, 181 insertions, 143 deletions
diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll index c225abf..e709302 100644 --- a/test/CodeGen/NVPTX/access-non-generic.ll +++ b/test/CodeGen/NVPTX/access-non-generic.ll @@ -18,7 +18,7 @@ define float @ld_st_shared_f32(i32 %i, float %v) { ; IR-NOT: addrspacecast ; PTX-LABEL: ld_st_shared_f32( ; load cast - %1 = load float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 + %1 = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar]; ; store cast store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 @@ -29,7 +29,7 @@ define float @ld_st_shared_f32(i32 %i, float %v) { ; cast; load %2 = addrspacecast float addrspace(3)* @scalar to float* - %3 = load float* %2, align 4 + %3 = load float, float* %2, align 4 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar]; ; cast; store store float %v, float* %2, align 4 @@ -38,17 +38,17 @@ define float @ld_st_shared_f32(i32 %i, float %v) { ; PTX: bar.sync 0; ; load gep cast - %4 = load float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 + %4 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20]; ; store gep cast - store float %v, float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 + store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 ; PTX: st.shared.f32 [array+20], %f{{[0-9]+}}; call void @llvm.cuda.syncthreads() ; PTX: bar.sync 0; ; gep cast; load - %5 = getelementptr inbounds [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5 - %6 = load float* %5, align 4 + %5 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5 + %6 = load float, float* %5, align 4 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20]; ; gep cast; store store float %v, float* %5, align 4 @@ -58,8 +58,8 @@ define float @ld_st_shared_f32(i32 %i, float %v) { ; cast; gep; load %7 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]* - %8 = getelementptr inbounds [10 x float]* %7, i32 0, i32 %i - %9 = load float* %8, align 4 + %8 = getelementptr inbounds [10 x float], [10 x float]* %7, i32 0, i32 %i + %9 = load float, float* %8, align 4 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [%{{(r|rl|rd)[0-9]+}}]; ; cast; gep; store store float %v, float* %8, align 4 @@ -78,10 +78,10 @@ define float @ld_st_shared_f32(i32 %i, float %v) { ; addrspacecast with a bitcast. define i32 @ld_int_from_float() { ; IR-LABEL: @ld_int_from_float -; IR: load i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) +; IR: load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) ; PTX-LABEL: ld_int_from_float( ; PTX: ld.shared.u{{(32|64)}} - %1 = load i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4 + %1 = load i32, i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4 ret i32 %1 } diff --git a/test/CodeGen/NVPTX/addrspacecast.ll b/test/CodeGen/NVPTX/addrspacecast.ll index 03b9a98..42e67ca 100644 --- a/test/CodeGen/NVPTX/addrspacecast.ll +++ b/test/CodeGen/NVPTX/addrspacecast.ll @@ -10,7 +10,7 @@ define i32 @conv1(i32 addrspace(1)* %ptr) { ; PTX64: cvta.global.u64 ; PTX64: ld.u32 %genptr = addrspacecast i32 addrspace(1)* %ptr to i32* - %val = load i32* %genptr + %val = load i32, i32* %genptr ret i32 %val } @@ -22,7 +22,7 @@ define i32 @conv2(i32 addrspace(3)* %ptr) { ; PTX64: cvta.shared.u64 ; PTX64: ld.u32 %genptr = addrspacecast i32 addrspace(3)* %ptr to i32* - %val = load i32* %genptr + %val = load i32, i32* %genptr ret i32 %val } @@ -34,7 +34,7 @@ define i32 @conv3(i32 addrspace(4)* %ptr) { ; PTX64: cvta.const.u64 ; PTX64: ld.u32 %genptr = addrspacecast i32 addrspace(4)* %ptr to i32* - %val = load i32* %genptr + %val = load i32, i32* %genptr ret i32 %val } @@ -46,7 +46,7 @@ define i32 @conv4(i32 addrspace(5)* %ptr) { ; PTX64: cvta.local.u64 ; PTX64: ld.u32 %genptr = addrspacecast i32 addrspace(5)* %ptr to i32* - %val = load i32* %genptr + %val = load i32, i32* %genptr ret i32 %val } @@ -58,7 +58,7 @@ define i32 @conv5(i32* %ptr) { ; PTX64: cvta.to.global.u64 ; PTX64: ld.global.u32 %specptr = addrspacecast i32* %ptr to i32 addrspace(1)* - %val = load i32 addrspace(1)* %specptr + %val = load i32, i32 addrspace(1)* %specptr ret i32 %val } @@ -70,7 +70,7 @@ define i32 @conv6(i32* %ptr) { ; PTX64: cvta.to.shared.u64 ; PTX64: ld.shared.u32 %specptr = addrspacecast i32* %ptr to i32 addrspace(3)* - %val = load i32 addrspace(3)* %specptr + %val = load i32, i32 addrspace(3)* %specptr ret i32 %val } @@ -82,7 +82,7 @@ define i32 @conv7(i32* %ptr) { ; PTX64: cvta.to.const.u64 ; PTX64: ld.const.u32 %specptr = addrspacecast i32* %ptr to i32 addrspace(4)* - %val = load i32 addrspace(4)* %specptr + %val = load i32, i32 addrspace(4)* %specptr ret i32 %val } @@ -94,6 +94,6 @@ define i32 @conv8(i32* %ptr) { ; PTX64: cvta.to.local.u64 ; PTX64: ld.local.u32 %specptr = addrspacecast i32* %ptr to i32 addrspace(5)* - %val = load i32 addrspace(5)* %specptr + %val = load i32, i32 addrspace(5)* %specptr ret i32 %val } diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll index cacffce..76af386 100644 --- a/test/CodeGen/NVPTX/bug21465.ll +++ b/test/CodeGen/NVPTX/bug21465.ll @@ -11,8 +11,8 @@ entry: ; CHECK-LABEL @_Z22TakesStruct1SPi ; CHECK: bitcast %struct.S* %input to i8* ; CHECK: call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8 - %b = getelementptr inbounds %struct.S* %input, i64 0, i32 1 - %0 = load i32* %b, align 4 + %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1 + %0 = load i32, i32* %b, align 4 store i32 %0, i32* %output, align 4 ret void } diff --git a/test/CodeGen/NVPTX/bug22322.ll b/test/CodeGen/NVPTX/bug22322.ll index 19ee694..97863b9 100644 --- a/test/CodeGen/NVPTX/bug22322.ll +++ b/test/CodeGen/NVPTX/bug22322.ll @@ -24,14 +24,14 @@ _ZL11compute_vecRK6float3jb.exit: store float %9, float* %ret_vec.sroa.8.i, align 4 ; CHECK: setp.lt.f32 %p{{[0-9]+}}, %f{{[0-9]+}}, 0f00000000 %10 = fcmp olt float %9, 0.000000e+00 - %ret_vec.sroa.8.i.val = load float* %ret_vec.sroa.8.i, align 4 + %ret_vec.sroa.8.i.val = load float, float* %ret_vec.sroa.8.i, align 4 %11 = select i1 %10, float 0.000000e+00, float %ret_vec.sroa.8.i.val call void @llvm.lifetime.end(i64 4, i8* %6) - %12 = getelementptr inbounds %class.float3* %dst, i64 %5, i32 0 + %12 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 0 store float 0.000000e+00, float* %12, align 4 - %13 = getelementptr inbounds %class.float3* %dst, i64 %5, i32 1 + %13 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 1 store float %11, float* %13, align 4 - %14 = getelementptr inbounds %class.float3* %dst, i64 %5, i32 2 + %14 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 2 store float 0.000000e+00, float* %14, align 4 ret void } diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index 8483112..58b1911 100644 --- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -27,22 +27,22 @@ entry: ; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rd[[A_REG]]] ; CHECK: st.f32 [%SP+0], %f[[A0_REG]] - %0 = load float* %a, align 4 + %0 = load float, float* %a, align 4 %1 = bitcast [16 x i8]* %buf to float* store float %0, float* %1, align 4 - %arrayidx2 = getelementptr inbounds float* %a, i64 1 - %2 = load float* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 1 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 1 + %2 = load float, float* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 1 %3 = bitcast i8* %arrayidx3 to float* store float %2, float* %3, align 4 - %arrayidx4 = getelementptr inbounds float* %a, i64 2 - %4 = load float* %arrayidx4, align 4 - %arrayidx5 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 2 + %arrayidx4 = getelementptr inbounds float, float* %a, i64 2 + %4 = load float, float* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 2 %5 = bitcast i8* %arrayidx5 to float* store float %4, float* %5, align 4 - %arrayidx6 = getelementptr inbounds float* %a, i64 3 - %6 = load float* %arrayidx6, align 4 - %arrayidx7 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 3 + %arrayidx6 = getelementptr inbounds float, float* %a, i64 3 + %6 = load float, float* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 3 %7 = bitcast i8* %arrayidx7 to float* store float %6, float* %7, align 4 @@ -54,7 +54,7 @@ entry: ; CHECK-NEXT: call.uni ; CHECK-NEXT: callee, - %arraydecay = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 0 + %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 0 call void @callee(float* %a, i8* %arraydecay) #2 ret void } diff --git a/test/CodeGen/NVPTX/fp16.ll b/test/CodeGen/NVPTX/fp16.ll index 8770399..b85eed0 100644 --- a/test/CodeGen/NVPTX/fp16.ll +++ b/test/CodeGen/NVPTX/fp16.ll @@ -8,7 +8,7 @@ declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone ; CHECK-LABEL: @test_convert_fp16_to_fp32 ; CHECK: cvt.f32.f16 define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { - %val = load i16 addrspace(1)* %in, align 2 + %val = load i16, i16 addrspace(1)* %in, align 2 %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone store float %cvt, float addrspace(1)* %out, align 4 ret void @@ -18,7 +18,7 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add ; CHECK-LABEL: @test_convert_fp16_to_fp64 ; CHECK: cvt.f64.f16 define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { - %val = load i16 addrspace(1)* %in, align 2 + %val = load i16, i16 addrspace(1)* %in, align 2 %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone store double %cvt, double addrspace(1)* %out, align 4 ret void @@ -28,7 +28,7 @@ define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 ad ; CHECK-LABEL: @test_convert_fp32_to_fp16 ; CHECK: cvt.rn.f16.f32 define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %val = load float addrspace(1)* %in, align 2 + %val = load float, float addrspace(1)* %in, align 2 %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone store i16 %cvt, i16 addrspace(1)* %out, align 4 ret void @@ -38,7 +38,7 @@ define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float add ; CHECK-LABEL: @test_convert_fp64_to_fp16 ; CHECK: cvt.rn.f16.f64 define void @test_convert_fp64_to_fp16(i16 addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind { - %val = load double addrspace(1)* %in, align 2 + %val = load double, double addrspace(1)* %in, align 2 %cvt = call i16 @llvm.convert.to.fp16.f64(double %val) nounwind readnone store i16 %cvt, i16 addrspace(1)* %out, align 4 ret void diff --git a/test/CodeGen/NVPTX/function-align.ll b/test/CodeGen/NVPTX/function-align.ll new file mode 100644 index 0000000..e7abfb1 --- /dev/null +++ b/test/CodeGen/NVPTX/function-align.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; CHECK-NOT: .align 2 +define ptx_device void @foo() align 2 { +; CHECK-LABEL: .func foo + ret void +} diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll index fb63d6e..66917d5 100644 --- a/test/CodeGen/NVPTX/generic-to-nvvm.ll +++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll @@ -13,9 +13,9 @@ target triple = "nvptx-nvidia-cuda" define void @foo(i32* %a, i32* %b) { ; CHECK: cvta.global.u32 - %ld1 = load i32* @myglobal + %ld1 = load i32, i32* @myglobal ; CHECK: cvta.global.u32 - %ld2 = load i32* @myconst + %ld2 = load i32, i32* @myconst store i32 %ld1, i32* %a store i32 %ld2, i32* %b ret void diff --git a/test/CodeGen/NVPTX/half.ll b/test/CodeGen/NVPTX/half.ll index aa08cc7..b995241 100644 --- a/test/CodeGen/NVPTX/half.ll +++ b/test/CodeGen/NVPTX/half.ll @@ -4,7 +4,7 @@ define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) { ; CHECK-LABEL: @test_load_store ; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]] - %val = load half addrspace(1)* %in + %val = load half, half addrspace(1)* %in store half %val, half addrspace(1) * %out ret void } @@ -13,7 +13,7 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o ; CHECK-LABEL: @test_bitcast_from_half ; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]] - %val = load half addrspace(1) * %in + %val = load half, half addrspace(1) * %in %val_int = bitcast half %val to i16 store i16 %val_int, i16 addrspace(1)* %out ret void @@ -23,7 +23,7 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in ; CHECK-LABEL: @test_bitcast_to_half ; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]] - %val = load i16 addrspace(1)* %in + %val = load i16, i16 addrspace(1)* %in %val_fp = bitcast i16 %val to half store half %val_fp, half addrspace(1)* %out ret void @@ -33,7 +33,7 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) { ; CHECK-LABEL: @test_extend32 ; CHECK: cvt.f32.f16 - %val16 = load half addrspace(1)* %in + %val16 = load half, half addrspace(1)* %in %val32 = fpext half %val16 to float store float %val32, float addrspace(1)* %out ret void @@ -43,7 +43,7 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) { ; CHECK-LABEL: @test_extend64 ; CHECK: cvt.f64.f16 - %val16 = load half addrspace(1)* %in + %val16 = load half, half addrspace(1)* %in %val64 = fpext half %val16 to double store double %val64, double addrspace(1)* %out ret void @@ -53,7 +53,7 @@ define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) { ; CHECK-LABEL: test_trunc32 ; CHECK: cvt.rn.f16.f32 - %val32 = load float addrspace(1)* %in + %val32 = load float, float addrspace(1)* %in %val16 = fptrunc float %val32 to half store half %val16, half addrspace(1)* %out ret void @@ -63,7 +63,7 @@ define void @test_trunc64(double addrspace(1)* %in, half addrspace(1)* %out) { ; CHECK-LABEL: @test_trunc64 ; CHECK: cvt.rn.f16.f64 - %val32 = load double addrspace(1)* %in + %val32 = load double, double addrspace(1)* %in %val16 = fptrunc double %val32 to half store half %val16, half addrspace(1)* %out ret void diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll index e3fe08e..35d77b4 100644 --- a/test/CodeGen/NVPTX/i1-global.ll +++ b/test/CodeGen/NVPTX/i1-global.ll @@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda" define void @foo(i1 %p, i32* %out) { - %ld = load i1 addrspace(1)* @mypred + %ld = load i1, i1 addrspace(1)* @mypred %val = zext i1 %ld to i32 store i32 %val, i32* %out ret void diff --git a/test/CodeGen/NVPTX/i8-param.ll b/test/CodeGen/NVPTX/i8-param.ll index 84daa9f..6a1e3a0 100644 --- a/test/CodeGen/NVPTX/i8-param.ll +++ b/test/CodeGen/NVPTX/i8-param.ll @@ -13,7 +13,7 @@ define i8 @callee(i8 %a) { ; CHECK: .visible .func caller define void @caller(i8* %a) { ; CHECK: ld.u8 - %val = load i8* %a + %val = load i8, i8* %a %ret = tail call i8 @callee(i8 %val) ; CHECK: ld.param.b32 store i8 %ret, i8* %a diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll index f33659c..0018e61 100644 --- a/test/CodeGen/NVPTX/ld-addrspace.ll +++ b/test/CodeGen/NVPTX/ld-addrspace.ll @@ -8,7 +8,7 @@ define i8 @ld_global_i8(i8 addrspace(1)* %ptr) { ; PTX32: ret ; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i8 addrspace(1)* %ptr + %a = load i8, i8 addrspace(1)* %ptr ret i8 %a } @@ -17,7 +17,7 @@ define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) { ; PTX32: ret ; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i8 addrspace(3)* %ptr + %a = load i8, i8 addrspace(3)* %ptr ret i8 %a } @@ -26,7 +26,7 @@ define i8 @ld_local_i8(i8 addrspace(5)* %ptr) { ; PTX32: ret ; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i8 addrspace(5)* %ptr + %a = load i8, i8 addrspace(5)* %ptr ret i8 %a } @@ -36,7 +36,7 @@ define i16 @ld_global_i16(i16 addrspace(1)* %ptr) { ; PTX32: ret ; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i16 addrspace(1)* %ptr + %a = load i16, i16 addrspace(1)* %ptr ret i16 %a } @@ -45,7 +45,7 @@ define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) { ; PTX32: ret ; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i16 addrspace(3)* %ptr + %a = load i16, i16 addrspace(3)* %ptr ret i16 %a } @@ -54,7 +54,7 @@ define i16 @ld_local_i16(i16 addrspace(5)* %ptr) { ; PTX32: ret ; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i16 addrspace(5)* %ptr + %a = load i16, i16 addrspace(5)* %ptr ret i16 %a } @@ -64,7 +64,7 @@ define i32 @ld_global_i32(i32 addrspace(1)* %ptr) { ; PTX32: ret ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i32 addrspace(1)* %ptr + %a = load i32, i32 addrspace(1)* %ptr ret i32 %a } @@ -73,7 +73,7 @@ define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) { ; PTX32: ret ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i32 addrspace(3)* %ptr + %a = load i32, i32 addrspace(3)* %ptr ret i32 %a } @@ -82,7 +82,7 @@ define i32 @ld_local_i32(i32 addrspace(5)* %ptr) { ; PTX32: ret ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i32 addrspace(5)* %ptr + %a = load i32, i32 addrspace(5)* %ptr ret i32 %a } @@ -92,7 +92,7 @@ define i64 @ld_global_i64(i64 addrspace(1)* %ptr) { ; PTX32: ret ; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i64 addrspace(1)* %ptr + %a = load i64, i64 addrspace(1)* %ptr ret i64 %a } @@ -101,7 +101,7 @@ define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) { ; PTX32: ret ; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i64 addrspace(3)* %ptr + %a = load i64, i64 addrspace(3)* %ptr ret i64 %a } @@ -110,7 +110,7 @@ define i64 @ld_local_i64(i64 addrspace(5)* %ptr) { ; PTX32: ret ; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i64 addrspace(5)* %ptr + %a = load i64, i64 addrspace(5)* %ptr ret i64 %a } @@ -120,7 +120,7 @@ define float @ld_global_f32(float addrspace(1)* %ptr) { ; PTX32: ret ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load float addrspace(1)* %ptr + %a = load float, float addrspace(1)* %ptr ret float %a } @@ -129,7 +129,7 @@ define float @ld_shared_f32(float addrspace(3)* %ptr) { ; PTX32: ret ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load float addrspace(3)* %ptr + %a = load float, float addrspace(3)* %ptr ret float %a } @@ -138,7 +138,7 @@ define float @ld_local_f32(float addrspace(5)* %ptr) { ; PTX32: ret ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load float addrspace(5)* %ptr + %a = load float, float addrspace(5)* %ptr ret float %a } @@ -148,7 +148,7 @@ define double @ld_global_f64(double addrspace(1)* %ptr) { ; PTX32: ret ; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load double addrspace(1)* %ptr + %a = load double, double addrspace(1)* %ptr ret double %a } @@ -157,7 +157,7 @@ define double @ld_shared_f64(double addrspace(3)* %ptr) { ; PTX32: ret ; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load double addrspace(3)* %ptr + %a = load double, double addrspace(3)* %ptr ret double %a } @@ -166,6 +166,6 @@ define double @ld_local_f64(double addrspace(5)* %ptr) { ; PTX32: ret ; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load double addrspace(5)* %ptr + %a = load double, double addrspace(5)* %ptr ret double %a } diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll index d629e0e..44cfe65 100644 --- a/test/CodeGen/NVPTX/ld-generic.ll +++ b/test/CodeGen/NVPTX/ld-generic.ll @@ -8,7 +8,7 @@ define i8 @ld_global_i8(i8 addrspace(0)* %ptr) { ; PTX32: ret ; PTX64: ld.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i8 addrspace(0)* %ptr + %a = load i8, i8 addrspace(0)* %ptr ret i8 %a } @@ -18,7 +18,7 @@ define i16 @ld_global_i16(i16 addrspace(0)* %ptr) { ; PTX32: ret ; PTX64: ld.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i16 addrspace(0)* %ptr + %a = load i16, i16 addrspace(0)* %ptr ret i16 %a } @@ -28,7 +28,7 @@ define i32 @ld_global_i32(i32 addrspace(0)* %ptr) { ; PTX32: ret ; PTX64: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i32 addrspace(0)* %ptr + %a = load i32, i32 addrspace(0)* %ptr ret i32 %a } @@ -38,7 +38,7 @@ define i64 @ld_global_i64(i64 addrspace(0)* %ptr) { ; PTX32: ret ; PTX64: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load i64 addrspace(0)* %ptr + %a = load i64, i64 addrspace(0)* %ptr ret i64 %a } @@ -48,7 +48,7 @@ define float @ld_global_f32(float addrspace(0)* %ptr) { ; PTX32: ret ; PTX64: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load float addrspace(0)* %ptr + %a = load float, float addrspace(0)* %ptr ret float %a } @@ -58,6 +58,6 @@ define double @ld_global_f64(double addrspace(0)* %ptr) { ; PTX32: ret ; PTX64: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: ret - %a = load double addrspace(0)* %ptr + %a = load double, double addrspace(0)* %ptr ret double %a } diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll index fd35a75..ec96a49 100644 --- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll +++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -6,9 +6,9 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @reg_plus_offset(i32* %a) { ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32]; ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36]; - %p2 = getelementptr i32* %a, i32 8 + %p2 = getelementptr i32, i32* %a, i32 8 %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4) - %p3 = getelementptr i32* %a, i32 9 + %p3 = getelementptr i32, i32* %a, i32 9 %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4) %t3 = mul i32 %t1, %t2 store i32 %t3, i32* %a diff --git a/test/CodeGen/NVPTX/load-sext-i1.ll b/test/CodeGen/NVPTX/load-sext-i1.ll index d836740..9fc98a4 100644 --- a/test/CodeGen/NVPTX/load-sext-i1.ll +++ b/test/CodeGen/NVPTX/load-sext-i1.ll @@ -6,8 +6,8 @@ target triple = "nvptx-nvidia-cuda" define void @main(i1* %a1, i32 %a2, i32* %arg3) { ; CHECK: ld.u8 ; CHECK-NOT: ld.u1 - %t1 = getelementptr i1* %a1, i32 %a2 - %t2 = load i1* %t1 + %t1 = getelementptr i1, i1* %a1, i32 %a2 + %t2 = load i1, i1* %t1 %t3 = sext i1 %t2 to i32 store i32 %t3, i32* %arg3 ret void diff --git a/test/CodeGen/NVPTX/machine-sink.ll b/test/CodeGen/NVPTX/machine-sink.ll index 3614bea..65ba141 100644 --- a/test/CodeGen/NVPTX/machine-sink.ll +++ b/test/CodeGen/NVPTX/machine-sink.ll @@ -14,8 +14,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define float @post_dominate(float %x, i1 %cond) { ; CHECK-LABEL: post_dominate( entry: - %0 = load float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4 - %1 = load float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4 + %0 = load float, float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4 + %1 = load float, float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4 ; CHECK: ld.shared.f32 ; CHECK: ld.shared.f32 %2 = fmul float %0, %0 diff --git a/test/CodeGen/NVPTX/misaligned-vector-ldst.ll b/test/CodeGen/NVPTX/misaligned-vector-ldst.ll index 90c9c43..2ad72b0 100644 --- a/test/CodeGen/NVPTX/misaligned-vector-ldst.ll +++ b/test/CodeGen/NVPTX/misaligned-vector-ldst.ll @@ -10,7 +10,7 @@ define <4 x float> @t1(i8* %p1) { ; CHECK-NOT: ld.f32 ; CHECK: ld.u8 %cast = bitcast i8* %p1 to <4 x float>* - %r = load <4 x float>* %cast, align 1 + %r = load <4 x float>, <4 x float>* %cast, align 1 ret <4 x float> %r } @@ -20,7 +20,7 @@ define <4 x float> @t2(i8* %p1) { ; CHECK-NOT: ld.v2 ; CHECK: ld.f32 %cast = bitcast i8* %p1 to <4 x float>* - %r = load <4 x float>* %cast, align 4 + %r = load <4 x float>, <4 x float>* %cast, align 4 ret <4 x float> %r } @@ -29,7 +29,7 @@ define <4 x float> @t3(i8* %p1) { ; CHECK-NOT: ld.v4 ; CHECK: ld.v2 %cast = bitcast i8* %p1 to <4 x float>* - %r = load <4 x float>* %cast, align 8 + %r = load <4 x float>, <4 x float>* %cast, align 8 ret <4 x float> %r } @@ -37,7 +37,7 @@ define <4 x float> @t3(i8* %p1) { define <4 x float> @t4(i8* %p1) { ; CHECK: ld.v4 %cast = bitcast i8* %p1 to <4 x float>* - %r = load <4 x float>* %cast, align 16 + %r = load <4 x float>, <4 x float>* %cast, align 16 ret <4 x float> %r } diff --git a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll index 841bbc3..2fec31b 100644 --- a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll +++ b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll @@ -11,16 +11,16 @@ define void @foo(float* %output) #1 { entry: %output.addr = alloca float*, align 8 store float* %output, float** %output.addr, align 8 - %0 = load float** %output.addr, align 8 - %arrayidx = getelementptr inbounds float* %0, i64 0 - %1 = load float* %arrayidx, align 4 + %0 = load float*, float** %output.addr, align 8 + %arrayidx = getelementptr inbounds float, float* %0, i64 0 + %1 = load float, float* %arrayidx, align 4 %conv = fpext float %1 to double %cmp = fcmp olt double %conv, 1.000000e+01 br i1 %cmp, label %if.then, label %if.else if.then: ; preds = %entry - %2 = load float** %output.addr, align 8 - %3 = load float* %2, align 4 + %2 = load float*, float** %output.addr, align 8 + %3 = load float, float* %2, align 4 %conv1 = fpext float %3 to double %add = fadd double %conv1, 1.000000e+00 %conv2 = fptrunc double %add to float @@ -28,8 +28,8 @@ if.then: ; preds = %entry br label %if.end if.else: ; preds = %entry - %4 = load float** %output.addr, align 8 - %5 = load float* %4, align 4 + %4 = load float*, float** %output.addr, align 8 + %5 = load float, float* %4, align 4 %conv3 = fpext float %5 to double %add4 = fadd double %conv3, 2.000000e+00 %conv5 = fptrunc double %add4 to float @@ -38,16 +38,16 @@ if.else: ; preds = %entry if.end: ; preds = %if.else, %if.then call void @llvm.cuda.syncthreads() - %6 = load float** %output.addr, align 8 - %arrayidx6 = getelementptr inbounds float* %6, i64 0 - %7 = load float* %arrayidx6, align 4 + %6 = load float*, float** %output.addr, align 8 + %arrayidx6 = getelementptr inbounds float, float* %6, i64 0 + %7 = load float, float* %arrayidx6, align 4 %conv7 = fpext float %7 to double %cmp8 = fcmp olt double %conv7, 1.000000e+01 br i1 %cmp8, label %if.then9, label %if.else13 if.then9: ; preds = %if.end - %8 = load float** %output.addr, align 8 - %9 = load float* %8, align 4 + %8 = load float*, float** %output.addr, align 8 + %9 = load float, float* %8, align 4 %conv10 = fpext float %9 to double %add11 = fadd double %conv10, 3.000000e+00 %conv12 = fptrunc double %add11 to float @@ -55,8 +55,8 @@ if.then9: ; preds = %if.end br label %if.end17 if.else13: ; preds = %if.end - %10 = load float** %output.addr, align 8 - %11 = load float* %10, align 4 + %10 = load float*, float** %output.addr, align 8 + %11 = load float, float* %10, align 4 %conv14 = fpext float %11 to double %add15 = fadd double %conv14, 4.000000e+00 %conv16 = fptrunc double %add15 to float diff --git a/test/CodeGen/NVPTX/nounroll.ll b/test/CodeGen/NVPTX/nounroll.ll index db96d2a..e80a4a2 100644 --- a/test/CodeGen/NVPTX/nounroll.ll +++ b/test/CodeGen/NVPTX/nounroll.ll @@ -17,10 +17,10 @@ for.body: ; CHECK: .pragma "nounroll" %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %idxprom = sext i32 %i.06 to i64 - %arrayidx = getelementptr inbounds float* %input, i64 %idxprom - %0 = load float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 ; CHECK: ld.f32 - %arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom + %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom store float %0, float* %arrayidx2, align 4 ; CHECK: st.f32 %inc = add nuw nsw i32 %i.06, 1 diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll index 21e9c69..8c75dfc 100644 --- a/test/CodeGen/NVPTX/nvvm-reflect.ll +++ b/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -11,7 +11,7 @@ define float @foo(float %a, float %b) { ; USE_MUL_0-NOT: call i32 @__nvvm_reflect ; USE_MUL_1: define float @foo ; USE_MUL_1-NOT: call i32 @__nvvm_reflect - %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0)) + %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) %cmp = icmp ugt i32 %reflect, 0 br i1 %cmp, label %use_mul, label %use_add @@ -42,7 +42,38 @@ define i32 @intrinsic() { ; USE_MUL_0: ret i32 0 ; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect ; USE_MUL_1: ret i32 1 - %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0)) + %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr) ret i32 %reflect } + +; CUDA-7.0 passes __nvvm_reflect argument slightly differently. +; Verify that it works, too + +@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00" + +define float @bar(float %a, float %b) { +; USE_MUL_0: define float @bar +; USE_MUL_0-NOT: call i32 @__nvvm_reflect +; USE_MUL_1: define float @bar +; USE_MUL_1-NOT: call i32 @__nvvm_reflect + %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) + %cmp = icmp ne i32 %reflect, 0 + br i1 %cmp, label %use_mul, label %use_add + +use_mul: +; USE_MUL_1: fmul float %a, %b +; USE_MUL_0-NOT: fadd float %a, %b + %ret1 = fmul float %a, %b + br label %exit + +use_add: +; USE_MUL_0: fadd float %a, %b +; USE_MUL_1-NOT: fmul float %a, %b + %ret2 = fadd float %a, %b + br label %exit + +exit: + %ret = phi float [%ret1, %use_mul], [%ret2, %use_add] + ret float %ret +} diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll index cc67a6f..d4f7c3b 100644 --- a/test/CodeGen/NVPTX/pr13291-i1-store.ll +++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll @@ -19,7 +19,7 @@ define ptx_kernel void @t2(i1* %a, i8* %b) { ; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1; ; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1; - %t1 = load i1* %a + %t1 = load i1, i1* %a %t2 = select i1 %t1, i8 1, i8 2 store i8 %t2, i8* %b ret void diff --git a/test/CodeGen/NVPTX/pr16278.ll b/test/CodeGen/NVPTX/pr16278.ll index 5432a84..a836eaf 100644 --- a/test/CodeGen/NVPTX/pr16278.ll +++ b/test/CodeGen/NVPTX/pr16278.ll @@ -5,6 +5,6 @@ define float @foo() { ; CHECK: ld.const.f32 - %val = load float addrspace(4)* @one_f + %val = load float, float addrspace(4)* @one_f ret float %val } diff --git a/test/CodeGen/NVPTX/pr17529.ll b/test/CodeGen/NVPTX/pr17529.ll index a162142..a751977 100644 --- a/test/CodeGen/NVPTX/pr17529.ll +++ b/test/CodeGen/NVPTX/pr17529.ll @@ -11,7 +11,7 @@ entry: vector.body: ; preds = %vector.body, %entry %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] - %scevgep9 = getelementptr i8* %dst, i64 %index + %scevgep9 = getelementptr i8, i8* %dst, i64 %index %scevgep910 = bitcast i8* %scevgep9 to <4 x i8>* store <4 x i8> undef, <4 x i8>* %scevgep910, align 1 %index.next = add i64 %index, 4 @@ -22,13 +22,13 @@ middle.block: ; preds = %vector.body br i1 undef, label %for.end, label %for.body.preheader1 for.body.preheader1: ; preds = %middle.block - %scevgep2 = getelementptr i8* %dst, i64 0 + %scevgep2 = getelementptr i8, i8* %dst, i64 0 br label %for.body for.body: ; preds = %for.body, %for.body.preheader1 %lsr.iv3 = phi i8* [ %scevgep2, %for.body.preheader1 ], [ %scevgep4, %for.body ] store i8 undef, i8* %lsr.iv3, align 1 - %scevgep4 = getelementptr i8* %lsr.iv3, i64 1 + %scevgep4 = getelementptr i8, i8* %lsr.iv3, i64 1 br label %for.body for.end: ; preds = %middle.block, %entry diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll index e8782ea..0432b67 100644 --- a/test/CodeGen/NVPTX/refl1.ll +++ b/test/CodeGen/NVPTX/refl1.ll @@ -5,7 +5,7 @@ target triple = "nvptx-nvidia-cuda" ; Function Attrs: nounwind ; CHECK: .entry foo define void @foo(float* nocapture %a) #0 { - %val = load float* %a + %val = load float, float* %a %tan = tail call fastcc float @__nv_fast_tanf(float %val) store float %tan, float* %a ret void diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll index 03ab635..fb01eb2 100644 --- a/test/CodeGen/NVPTX/sched1.ll +++ b/test/CodeGen/NVPTX/sched1.ll @@ -11,14 +11,14 @@ define void @foo(i32* %a) { ; CHECK-NEXT: add.s32 ; CHECK-NEXT: add.s32 ; CHECK-NEXT: add.s32 - %ptr0 = getelementptr i32* %a, i32 0 - %val0 = load i32* %ptr0 - %ptr1 = getelementptr i32* %a, i32 1 - %val1 = load i32* %ptr1 - %ptr2 = getelementptr i32* %a, i32 2 - %val2 = load i32* %ptr2 - %ptr3 = getelementptr i32* %a, i32 3 - %val3 = load i32* %ptr3 + %ptr0 = getelementptr i32, i32* %a, i32 0 + %val0 = load i32, i32* %ptr0 + %ptr1 = getelementptr i32, i32* %a, i32 1 + %val1 = load i32, i32* %ptr1 + %ptr2 = getelementptr i32, i32* %a, i32 2 + %val2 = load i32, i32* %ptr2 + %ptr3 = getelementptr i32, i32* %a, i32 3 + %val3 = load i32, i32* %ptr3 %t0 = add i32 %val0, %val1 %t1 = add i32 %t0, %val2 diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll index 71a9a49..91ed778 100644 --- a/test/CodeGen/NVPTX/sched2.ll +++ b/test/CodeGen/NVPTX/sched2.ll @@ -12,14 +12,14 @@ define void @foo(<2 x i32>* %a) { ; CHECK-NEXT: add.s32 ; CHECK-NEXT: add.s32 ; CHECK-NEXT: add.s32 - %ptr0 = getelementptr <2 x i32>* %a, i32 0 - %val0 = load <2 x i32>* %ptr0 - %ptr1 = getelementptr <2 x i32>* %a, i32 1 - %val1 = load <2 x i32>* %ptr1 - %ptr2 = getelementptr <2 x i32>* %a, i32 2 - %val2 = load <2 x i32>* %ptr2 - %ptr3 = getelementptr <2 x i32>* %a, i32 3 - %val3 = load <2 x i32>* %ptr3 + %ptr0 = getelementptr <2 x i32>, <2 x i32>* %a, i32 0 + %val0 = load <2 x i32>, <2 x i32>* %ptr0 + %ptr1 = getelementptr <2 x i32>, <2 x i32>* %a, i32 1 + %val1 = load <2 x i32>, <2 x i32>* %ptr1 + %ptr2 = getelementptr <2 x i32>, <2 x i32>* %a, i32 2 + %val2 = load <2 x i32>, <2 x i32>* %ptr2 + %ptr3 = getelementptr <2 x i32>, <2 x i32>* %a, i32 3 + %val3 = load <2 x i32>, <2 x i32>* %ptr3 %t0 = add <2 x i32> %val0, %val1 %t1 = add <2 x i32> %t0, %val2 diff --git a/test/CodeGen/NVPTX/shift-parts.ll b/test/CodeGen/NVPTX/shift-parts.ll index 748297c..b4d408f 100644 --- a/test/CodeGen/NVPTX/shift-parts.ll +++ b/test/CodeGen/NVPTX/shift-parts.ll @@ -12,8 +12,8 @@ define void @shift_parts_left_128(i128* %val, i128* %amtptr) { ; CHECK: setp.gt.s32 ; CHECK: selp.b64 ; CHECK: shl.b64 - %amt = load i128* %amtptr - %a = load i128* %val + %amt = load i128, i128* %amtptr + %a = load i128, i128* %val %val0 = shl i128 %a, %amt store i128 %val0, i128* %val ret void @@ -30,8 +30,8 @@ define void @shift_parts_right_128(i128* %val, i128* %amtptr) { ; CHECK: setp.gt.s32 ; CHECK: selp.b64 ; CHECK: shr.s64 - %amt = load i128* %amtptr - %a = load i128* %val + %amt = load i128, i128* %amtptr + %a = load i128, i128* %val %val0 = ashr i128 %a, %amt store i128 %val0, i128* %val ret void diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll index 1b41361..da65686 100644 --- a/test/CodeGen/NVPTX/simple-call.ll +++ b/test/CodeGen/NVPTX/simple-call.ll @@ -11,7 +11,7 @@ define float @device_func(float %a) noinline { ; CHECK: .entry kernel_func define void @kernel_func(float* %a) { - %val = load float* %a + %val = load float, float* %a ; CHECK: call.uni (retval0), ; CHECK: device_func, %mul = call float @device_func(float %val) diff --git a/test/CodeGen/NVPTX/symbol-naming.ll b/test/CodeGen/NVPTX/symbol-naming.ll index bd1333f..f8e6bf1 100644 --- a/test/CodeGen/NVPTX/symbol-naming.ll +++ b/test/CodeGen/NVPTX/symbol-naming.ll @@ -24,7 +24,7 @@ target triple = "nvptx64-unknown-unknown" ; Function Attrs: nounwind define void @foo(i32 %a, float %b, i8 signext %c, i32 %e) { entry: - %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) ret void } diff --git a/test/CodeGen/NVPTX/vector-compare.ll b/test/CodeGen/NVPTX/vector-compare.ll index 2180499..2992b0e 100644 --- a/test/CodeGen/NVPTX/vector-compare.ll +++ b/test/CodeGen/NVPTX/vector-compare.ll @@ -6,8 +6,8 @@ ; tried to promote <2 x i1> to <2 x i8> and instruction selection failed. define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) { - %aval = load <2 x i32>* %a - %bval = load <2 x i32>* %b + %aval = load <2 x i32>, <2 x i32>* %a + %bval = load <2 x i32>, <2 x i32>* %b %res = icmp slt <2 x i32> %aval, %bval %t1 = extractelement <2 x i1> %res, i32 0 %t2 = extractelement <2 x i1> %res, i32 1 diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll index 58882bf..d703489 100644 --- a/test/CodeGen/NVPTX/vector-loads.ll +++ b/test/CodeGen/NVPTX/vector-loads.ll @@ -10,7 +10,7 @@ define void @foo(<2 x float>* %a) { ; CHECK: .func foo ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}} - %t1 = load <2 x float>* %a + %t1 = load <2 x float>, <2 x float>* %a %t2 = fmul <2 x float> %t1, %t1 store <2 x float> %t2, <2 x float>* %a ret void @@ -19,7 +19,7 @@ define void @foo(<2 x float>* %a) { define void @foo2(<4 x float>* %a) { ; CHECK: .func foo2 ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} - %t1 = load <4 x float>* %a + %t1 = load <4 x float>, <4 x float>* %a %t2 = fmul <4 x float> %t1, %t1 store <4 x float> %t2, <4 x float>* %a ret void @@ -29,7 +29,7 @@ define void @foo3(<8 x float>* %a) { ; CHECK: .func foo3 ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} ; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} - %t1 = load <8 x float>* %a + %t1 = load <8 x float>, <8 x float>* %a %t2 = fmul <8 x float> %t1, %t1 store <8 x float> %t2, <8 x float>* %a ret void @@ -40,7 +40,7 @@ define void @foo3(<8 x float>* %a) { define void @foo4(<2 x i32>* %a) { ; CHECK: .func foo4 ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}} - %t1 = load <2 x i32>* %a + %t1 = load <2 x i32>, <2 x i32>* %a %t2 = mul <2 x i32> %t1, %t1 store <2 x i32> %t2, <2 x i32>* %a ret void @@ -49,7 +49,7 @@ define void @foo4(<2 x i32>* %a) { define void @foo5(<4 x i32>* %a) { ; CHECK: .func foo5 ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} - %t1 = load <4 x i32>* %a + %t1 = load <4 x i32>, <4 x i32>* %a %t2 = mul <4 x i32> %t1, %t1 store <4 x i32> %t2, <4 x i32>* %a ret void @@ -59,7 +59,7 @@ define void @foo6(<8 x i32>* %a) { ; CHECK: .func foo6 ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} ; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} - %t1 = load <8 x i32>* %a + %t1 = load <8 x i32>, <8 x i32>* %a %t2 = mul <8 x i32> %t1, %t1 store <8 x i32> %t2, <8 x i32>* %a ret void diff --git a/test/CodeGen/NVPTX/vector-select.ll b/test/CodeGen/NVPTX/vector-select.ll index 11893df..1e81031 100644 --- a/test/CodeGen/NVPTX/vector-select.ll +++ b/test/CodeGen/NVPTX/vector-select.ll @@ -6,9 +6,9 @@ define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) { entry: - %tmp4 = load <2 x i32> addrspace(1)* %def_a - %tmp6 = load <2 x i32> addrspace(1)* %def_c - %tmp8 = load <2 x i32> addrspace(1)* %def_b + %tmp4 = load <2 x i32>, <2 x i32> addrspace(1)* %def_a + %tmp6 = load <2 x i32>, <2 x i32> addrspace(1)* %def_c + %tmp8 = load <2 x i32>, <2 x i32> addrspace(1)* %def_b %0 = icmp sge <2 x i32> %tmp4, zeroinitializer %cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8 store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c diff --git a/test/CodeGen/NVPTX/weak-global.ll b/test/CodeGen/NVPTX/weak-global.ll index 2bef4c5..a64f9f4 100644 --- a/test/CodeGen/NVPTX/weak-global.ll +++ b/test/CodeGen/NVPTX/weak-global.ll @@ -4,6 +4,6 @@ @g = common addrspace(1) global i32 zeroinitializer define i32 @func0() { - %val = load i32 addrspace(1)* @g + %val = load i32, i32 addrspace(1)* @g ret i32 %val } |