aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
authorPirama Arumuga Nainar <pirama@google.com>2015-04-08 08:55:49 -0700
committerPirama Arumuga Nainar <pirama@google.com>2015-04-09 15:04:38 -0700
commit4c5e43da7792f75567b693105cc53e3f1992ad98 (patch)
tree1b2c9792582e12f5af0b1512e3094425f0dc0df9 /test/CodeGen/R600
parentc75239e6119d0f9a74c57099d91cbc9bde56bf33 (diff)
downloadexternal_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.zip
external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.gz
external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.bz2
Update aosp/master llvm for rebase to r233350
Change-Id: I07d935f8793ee8ec6b7da003f6483046594bca49
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/32-bit-local-address-space.ll28
-rw-r--r--test/CodeGen/R600/add-debug.ll2
-rw-r--r--test/CodeGen/R600/add.ll22
-rw-r--r--test/CodeGen/R600/add_i64.ll20
-rw-r--r--test/CodeGen/R600/address-space.ll8
-rw-r--r--test/CodeGen/R600/and.ll34
-rw-r--r--test/CodeGen/R600/array-ptr-calc-i32.ll16
-rw-r--r--test/CodeGen/R600/array-ptr-calc-i64.ll8
-rw-r--r--test/CodeGen/R600/atomic_cmp_swap_local.ll10
-rw-r--r--test/CodeGen/R600/atomic_load_add.ll4
-rw-r--r--test/CodeGen/R600/atomic_load_sub.ll4
-rw-r--r--test/CodeGen/R600/bfe_uint.ll2
-rw-r--r--test/CodeGen/R600/big_alu.ll40
-rw-r--r--test/CodeGen/R600/bitcast.ll16
-rw-r--r--test/CodeGen/R600/bswap.ll14
-rw-r--r--test/CodeGen/R600/call.ll12
-rw-r--r--test/CodeGen/R600/call_fs.ll4
-rw-r--r--test/CodeGen/R600/codegen-prepare-addrmode-sext.ll2
-rw-r--r--test/CodeGen/R600/combine_vloads.ll4
-rw-r--r--test/CodeGen/R600/commute-compares.ll697
-rw-r--r--test/CodeGen/R600/commute_modifiers.ll66
-rw-r--r--test/CodeGen/R600/copy-illegal-type.ll18
-rw-r--r--test/CodeGen/R600/copy-to-reg.ll6
-rw-r--r--test/CodeGen/R600/ctlz_zero_undef.ll6
-rw-r--r--test/CodeGen/R600/ctpop.ll36
-rw-r--r--test/CodeGen/R600/ctpop64.ll10
-rw-r--r--test/CodeGen/R600/cttz_zero_undef.ll6
-rw-r--r--test/CodeGen/R600/cvt_f32_ubyte.ll24
-rw-r--r--test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll8
-rw-r--r--test/CodeGen/R600/disconnected-predset-break-bug.ll2
-rw-r--r--test/CodeGen/R600/dot4-folding.ll4
-rw-r--r--test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll22
-rw-r--r--test/CodeGen/R600/ds_read2.ll250
-rw-r--r--test/CodeGen/R600/ds_read2_offset_order.ll28
-rw-r--r--test/CodeGen/R600/ds_read2st64.ll130
-rw-r--r--test/CodeGen/R600/ds_write2.ll206
-rw-r--r--test/CodeGen/R600/ds_write2st64.ll52
-rw-r--r--test/CodeGen/R600/elf.ll16
-rw-r--r--test/CodeGen/R600/empty-function.ll4
-rw-r--r--test/CodeGen/R600/endcf-loop-header.ll2
-rw-r--r--test/CodeGen/R600/extload-private.ll8
-rw-r--r--test/CodeGen/R600/extload.ll8
-rw-r--r--test/CodeGen/R600/extract_vector_elt_i16.ll4
-rw-r--r--test/CodeGen/R600/fabs.f64.ll4
-rw-r--r--test/CodeGen/R600/fadd.ll6
-rw-r--r--test/CodeGen/R600/fadd64.ll4
-rw-r--r--test/CodeGen/R600/fceil64.ll6
-rw-r--r--test/CodeGen/R600/fcmp-cnd.ll2
-rw-r--r--test/CodeGen/R600/fcmp-cnde-int-args.ll2
-rw-r--r--test/CodeGen/R600/fcmp.ll8
-rw-r--r--test/CodeGen/R600/fcmp64.ll24
-rw-r--r--test/CodeGen/R600/fconst64.ll2
-rw-r--r--test/CodeGen/R600/fdiv.f64.ll22
-rw-r--r--test/CodeGen/R600/fdiv.ll6
-rw-r--r--test/CodeGen/R600/fetch-limits.r600.ll18
-rw-r--r--test/CodeGen/R600/fetch-limits.r700+.ll34
-rw-r--r--test/CodeGen/R600/ffloor.f64.ll63
-rw-r--r--test/CodeGen/R600/flat-address-space.ll20
-rw-r--r--test/CodeGen/R600/fma-combine.ll228
-rw-r--r--test/CodeGen/R600/fma.f64.ll18
-rw-r--r--test/CodeGen/R600/fma.ll38
-rw-r--r--test/CodeGen/R600/fmax3.f64.ll24
-rw-r--r--test/CodeGen/R600/fmax3.ll12
-rw-r--r--test/CodeGen/R600/fmax_legacy.f64.ll32
-rw-r--r--test/CodeGen/R600/fmax_legacy.ll40
-rw-r--r--test/CodeGen/R600/fmin3.ll12
-rw-r--r--test/CodeGen/R600/fmin_legacy.f64.ll32
-rw-r--r--test/CodeGen/R600/fmin_legacy.ll40
-rw-r--r--test/CodeGen/R600/fmul.ll6
-rw-r--r--test/CodeGen/R600/fmul64.ll12
-rw-r--r--test/CodeGen/R600/fmuladd.ll92
-rw-r--r--test/CodeGen/R600/fneg-fabs.f64.ll4
-rw-r--r--test/CodeGen/R600/fneg-fabs.ll2
-rw-r--r--test/CodeGen/R600/fp16_to_fp.ll4
-rw-r--r--test/CodeGen/R600/fp32_to_fp16.ll2
-rw-r--r--test/CodeGen/R600/fp_to_sint.f64.ll4
-rw-r--r--test/CodeGen/R600/fp_to_sint.ll2
-rw-r--r--test/CodeGen/R600/fp_to_uint.f64.ll4
-rw-r--r--test/CodeGen/R600/fp_to_uint.ll2
-rw-r--r--test/CodeGen/R600/frem.ll38
-rw-r--r--test/CodeGen/R600/fsqrt.ll4
-rw-r--r--test/CodeGen/R600/fsub.ll12
-rw-r--r--test/CodeGen/R600/fsub64.ll18
-rw-r--r--test/CodeGen/R600/ftrunc.f64.ll6
-rw-r--r--test/CodeGen/R600/gep-address-space.ll8
-rw-r--r--test/CodeGen/R600/global-directive.ll6
-rw-r--r--test/CodeGen/R600/global-extload-i1.ll64
-rw-r--r--test/CodeGen/R600/global-extload-i16.ll64
-rw-r--r--test/CodeGen/R600/global-extload-i32.ll28
-rw-r--r--test/CodeGen/R600/global-extload-i8.ll64
-rw-r--r--test/CodeGen/R600/global-zero-initializer.ll4
-rw-r--r--test/CodeGen/R600/global_atomics.ll160
-rw-r--r--test/CodeGen/R600/gv-const-addrspace-fail.ll16
-rw-r--r--test/CodeGen/R600/gv-const-addrspace.ll20
-rw-r--r--test/CodeGen/R600/half.ll12
-rw-r--r--test/CodeGen/R600/hsa.ll2
-rw-r--r--test/CodeGen/R600/i1-copy-phi.ll2
-rw-r--r--test/CodeGen/R600/i8-to-double-to-float.ll2
-rw-r--r--test/CodeGen/R600/icmp-select-sete-reverse-args.ll6
-rw-r--r--test/CodeGen/R600/imm.ll4
-rw-r--r--test/CodeGen/R600/indirect-addressing-si.ll8
-rw-r--r--test/CodeGen/R600/indirect-private-64.ll24
-rw-r--r--test/CodeGen/R600/insert_vector_elt.ll6
-rw-r--r--test/CodeGen/R600/jump-address.ll6
-rw-r--r--test/CodeGen/R600/kcache-fold.ll48
-rw-r--r--test/CodeGen/R600/large-alloca.ll6
-rw-r--r--test/CodeGen/R600/large-constant-initializer.ll2
-rw-r--r--test/CodeGen/R600/lds-initializer.ll4
-rw-r--r--test/CodeGen/R600/lds-oqap-crash.ll2
-rw-r--r--test/CodeGen/R600/lds-output-queue.ll18
-rw-r--r--test/CodeGen/R600/lds-size.ll2
-rw-r--r--test/CodeGen/R600/lds-zero-initializer.ll4
-rw-r--r--test/CodeGen/R600/legalizedag-bug-expand-setcc.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.abs.ll4
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll37
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll96
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfm.ll27
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.brev.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.class.ll72
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.cube.ll8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll48
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_scale.ll88
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll28
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll60
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.fract.ll51
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imax.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imin.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.tex.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umad24.ll10
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umax.ll4
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umin.ll4
-rw-r--r--test/CodeGen/R600/llvm.SI.imageload.ll20
-rw-r--r--test/CodeGen/R600/llvm.SI.load.dword.ll4
-rw-r--r--test/CodeGen/R600/llvm.amdgpu.dp4.ll4
-rw-r--r--test/CodeGen/R600/llvm.round.f64.ll8
-rw-r--r--test/CodeGen/R600/llvm.round.ll2
-rw-r--r--test/CodeGen/R600/load-i1.ll14
-rw-r--r--test/CodeGen/R600/load-input-fold.ll38
-rw-r--r--test/CodeGen/R600/load.ll102
-rw-r--r--test/CodeGen/R600/load.vec.ll4
-rw-r--r--test/CodeGen/R600/load64.ll6
-rw-r--r--test/CodeGen/R600/local-64.ll34
-rw-r--r--test/CodeGen/R600/local-atomics.ll56
-rw-r--r--test/CodeGen/R600/local-atomics64.ll48
-rw-r--r--test/CodeGen/R600/local-memory-two-objects.ll19
-rw-r--r--test/CodeGen/R600/local-memory.ll11
-rw-r--r--test/CodeGen/R600/loop-address.ll2
-rw-r--r--test/CodeGen/R600/loop-idiom.ll8
-rw-r--r--test/CodeGen/R600/m0-spill.ll4
-rw-r--r--test/CodeGen/R600/mad-combine.ll276
-rw-r--r--test/CodeGen/R600/mad-sub.ll118
-rw-r--r--test/CodeGen/R600/madak.ll78
-rw-r--r--test/CodeGen/R600/madmk.ll78
-rw-r--r--test/CodeGen/R600/max.ll58
-rw-r--r--test/CodeGen/R600/max3.ll28
-rw-r--r--test/CodeGen/R600/min.ll70
-rw-r--r--test/CodeGen/R600/min3.ll76
-rw-r--r--test/CodeGen/R600/missing-store.ll6
-rw-r--r--test/CodeGen/R600/mubuf.ll44
-rw-r--r--test/CodeGen/R600/mul.ll34
-rw-r--r--test/CodeGen/R600/no-initializer-constant-addrspace.ll4
-rw-r--r--test/CodeGen/R600/no-shrink-extloads.ll48
-rw-r--r--test/CodeGen/R600/operand-folding.ll2
-rw-r--r--test/CodeGen/R600/or.ll40
-rw-r--r--test/CodeGen/R600/parallelandifcollapse.ll16
-rw-r--r--test/CodeGen/R600/parallelorifcollapse.ll16
-rw-r--r--test/CodeGen/R600/private-memory-atomics.ll12
-rw-r--r--test/CodeGen/R600/private-memory-broken.ll6
-rw-r--r--test/CodeGen/R600/private-memory.ll152
-rw-r--r--test/CodeGen/R600/pv-packing.ll4
-rw-r--r--test/CodeGen/R600/pv.ll68
-rw-r--r--test/CodeGen/R600/r600-export-fix.ll50
-rw-r--r--test/CodeGen/R600/r600cfg.ll2
-rw-r--r--test/CodeGen/R600/register-count-comments.ll10
-rw-r--r--test/CodeGen/R600/reorder-stores.ll16
-rw-r--r--test/CodeGen/R600/rotl.i64.ll4
-rw-r--r--test/CodeGen/R600/rotr.i64.ll8
-rw-r--r--test/CodeGen/R600/rsq.ll18
-rw-r--r--test/CodeGen/R600/s_movk_i32.ll26
-rw-r--r--test/CodeGen/R600/saddo.ll8
-rw-r--r--test/CodeGen/R600/salu-to-valu.ll30
-rw-r--r--test/CodeGen/R600/scalar_to_vector.ll6
-rw-r--r--test/CodeGen/R600/schedule-fs-loop-nested.ll8
-rw-r--r--test/CodeGen/R600/schedule-fs-loop.ll8
-rw-r--r--test/CodeGen/R600/schedule-global-loads.ll12
-rw-r--r--test/CodeGen/R600/schedule-if-2.ll8
-rw-r--r--test/CodeGen/R600/schedule-if.ll6
-rw-r--r--test/CodeGen/R600/schedule-kernel-arg-loads.ll35
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll32
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop.ll32
-rw-r--r--test/CodeGen/R600/scratch-buffer.ll30
-rw-r--r--test/CodeGen/R600/sdiv.ll26
-rw-r--r--test/CodeGen/R600/sdivrem24.ll72
-rw-r--r--test/CodeGen/R600/select64.ll8
-rw-r--r--test/CodeGen/R600/selectcc-cnd.ll2
-rw-r--r--test/CodeGen/R600/selectcc-cnde-int.ll2
-rw-r--r--test/CodeGen/R600/selectcc-icmp-select-float.ll2
-rw-r--r--test/CodeGen/R600/selectcc-opt.ll4
-rw-r--r--test/CodeGen/R600/setcc-opt.ll20
-rw-r--r--test/CodeGen/R600/setcc.ll26
-rw-r--r--test/CodeGen/R600/sext-in-reg.ll89
-rw-r--r--test/CodeGen/R600/sgpr-control-flow.ll14
-rw-r--r--test/CodeGen/R600/sgpr-copy-duplicate-operand.ll2
-rw-r--r--test/CodeGen/R600/sgpr-copy.ll46
-rw-r--r--test/CodeGen/R600/shl.ll30
-rw-r--r--test/CodeGen/R600/shl_add_constant.ll12
-rw-r--r--test/CodeGen/R600/shl_add_ptr.ll50
-rw-r--r--test/CodeGen/R600/si-lod-bias.ll12
-rw-r--r--test/CodeGen/R600/si-sgpr-spill.ll184
-rw-r--r--test/CodeGen/R600/si-triv-disjoint-mem-access.ll100
-rw-r--r--test/CodeGen/R600/si-vector-hang.ll74
-rw-r--r--test/CodeGen/R600/sign_extend.ll2
-rw-r--r--test/CodeGen/R600/simplify-demanded-bits-build-pair.ll12
-rw-r--r--test/CodeGen/R600/sint_to_fp.f64.ll4
-rw-r--r--test/CodeGen/R600/sint_to_fp.ll2
-rw-r--r--test/CodeGen/R600/smrd.ll28
-rw-r--r--test/CodeGen/R600/split-scalar-i64-add.ll4
-rw-r--r--test/CodeGen/R600/sra.ll30
-rw-r--r--test/CodeGen/R600/srem.ll50
-rw-r--r--test/CodeGen/R600/srl.ll36
-rw-r--r--test/CodeGen/R600/ssubo.ll8
-rw-r--r--test/CodeGen/R600/store-barrier.ll24
-rw-r--r--test/CodeGen/R600/store-vector-ptrs.ll2
-rw-r--r--test/CodeGen/R600/store.ll96
-rw-r--r--test/CodeGen/R600/store.r600.ll4
-rw-r--r--test/CodeGen/R600/sub.ll42
-rw-r--r--test/CodeGen/R600/swizzle-export.ll42
-rw-r--r--test/CodeGen/R600/trunc-cmp-constant.ll40
-rw-r--r--test/CodeGen/R600/trunc.ll16
-rw-r--r--test/CodeGen/R600/tti-unroll-prefs.ll2
-rw-r--r--test/CodeGen/R600/uaddo.ll8
-rw-r--r--test/CodeGen/R600/udiv.ll18
-rw-r--r--test/CodeGen/R600/udivrem24.ll72
-rw-r--r--test/CodeGen/R600/uint_to_fp.f64.ll4
-rw-r--r--test/CodeGen/R600/uint_to_fp.ll4
-rw-r--r--test/CodeGen/R600/unaligned-load-store.ll32
-rw-r--r--test/CodeGen/R600/unhandled-loop-condition-assertion.ll60
-rw-r--r--test/CodeGen/R600/unroll.ll6
-rw-r--r--test/CodeGen/R600/urem.ll38
-rw-r--r--test/CodeGen/R600/usubo.ll8
-rw-r--r--test/CodeGen/R600/v_cndmask.ll4
-rw-r--r--test/CodeGen/R600/valu-i1.ll44
-rw-r--r--test/CodeGen/R600/vector-alloca.ll40
-rw-r--r--test/CodeGen/R600/vertex-fetch-encoding.ll4
-rw-r--r--test/CodeGen/R600/vop-shrink.ll2
-rw-r--r--test/CodeGen/R600/vselect.ll16
-rw-r--r--test/CodeGen/R600/vtx-fetch-branch.ll2
-rw-r--r--test/CodeGen/R600/vtx-schedule.ll4
-rw-r--r--test/CodeGen/R600/wait.ll10
-rw-r--r--test/CodeGen/R600/wrong-transalu-pos-fix.ll2
-rw-r--r--test/CodeGen/R600/xor.ll38
255 files changed, 4271 insertions, 3268 deletions
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
index 6aca826..c7bcfd2 100644
--- a/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -15,7 +15,7 @@
; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %0 = load i32 addrspace(3)* %in
+ %0 = load i32, i32 addrspace(3)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -26,8 +26,8 @@ entry:
; SI: ds_read_b32 [[VPTR]]
define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
entry:
- %0 = getelementptr i32 addrspace(3)* %in, i32 %offset
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr i32, i32 addrspace(3)* %in, i32 %offset
+ %1 = load i32, i32 addrspace(3)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -37,8 +37,8 @@ entry:
; SI: ds_read_b32 v{{[0-9]+}}, [[VPTR]] offset:4
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %0 = getelementptr i32 addrspace(3)* %in, i32 1
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr i32, i32 addrspace(3)* %in, i32 1
+ %1 = load i32, i32 addrspace(3)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -50,8 +50,8 @@ entry:
; SI: ds_read_b32 [[VPTR]]
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %0 = getelementptr i32 addrspace(3)* %in, i32 16385
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr i32, i32 addrspace(3)* %in, i32 16385
+ %1 = load i32, i32 addrspace(3)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -72,8 +72,8 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds)
; SI-NEXT: s_add_i32
; SI: ds_read_b32
define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
- %ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
- %val = load float addrspace(3)* %ptr
+ %ptr = getelementptr [3 x float], [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
+ %val = load float, float addrspace(3)* %ptr
store float %val, float addrspace(1)* %out
ret void
}
@@ -84,7 +84,7 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: ds_read_b32 v{{[0-9]+}}, [[REG]]
define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
- %val = load float addrspace(3)* @g_lds
+ %val = load float, float addrspace(3)* @g_lds
store float %val, float addrspace(1)* %out
ret void
}
@@ -96,7 +96,7 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti
; FUNC-LABEL: {{^}}global_ptr:
; SI: ds_write_b32
define void @global_ptr() nounwind {
- store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
+ store i32 addrspace(3)* getelementptr ([16384 x i32], [16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
ret void
}
@@ -112,7 +112,7 @@ define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
; SI: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
; SI: ds_write_b32 [[ADDR]], v{{[0-9]+}}
define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
- %gep = getelementptr i32 addrspace(3)* %out, i32 %offset
+ %gep = getelementptr i32, i32 addrspace(3)* %out, i32 %offset
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
@@ -122,7 +122,7 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32
; SI: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
; SI: ds_write_b32 [[VPTR]], [[VAL]] offset:4
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
- %gep = getelementptr i32 addrspace(3)* %out, i32 1
+ %gep = getelementptr i32, i32 addrspace(3)* %out, i32 1
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
@@ -133,7 +133,7 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; SI: ds_write_b32 [[VPTR]], v{{[0-9]+$}}
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
- %gep = getelementptr i32 addrspace(3)* %out, i32 16385
+ %gep = getelementptr i32, i32 addrspace(3)* %out, i32 16385
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
diff --git a/test/CodeGen/R600/add-debug.ll b/test/CodeGen/R600/add-debug.ll
index a83c689..529905d 100644
--- a/test/CodeGen/R600/add-debug.ll
+++ b/test/CodeGen/R600/add-debug.ll
@@ -9,7 +9,7 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i64 addrspace(1)* %in
+ %1 = load i64, i64 addrspace(1)* %in
br label %endif
else:
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index 3a8b97c..7027161 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -9,9 +9,9 @@
;SI-NOT: [[REG]]
;SI: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = add i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -25,9 +25,9 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1)* %in
- %b = load <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
%result = add <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -45,9 +45,9 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1)* %in
- %b = load <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
%result = add <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -136,7 +136,7 @@ entry:
; SI-NOT: v_addc_u32_e32 s
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
entry:
- %0 = load i64 addrspace(1)* %in
+ %0 = load i64, i64 addrspace(1)* %in
%1 = add i64 %a, %0
store i64 %1, i64 addrspace(1)* %out
ret void
@@ -152,7 +152,7 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i64 addrspace(1)* %in
+ %1 = load i64, i64 addrspace(1)* %in
br label %endif
else:
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
index 1769409..8346add 100644
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -8,10 +8,10 @@ declare i32 @llvm.r600.read.tidig.x() readnone
; SI: v_addc_u32
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid
- %a = load i64 addrspace(1)* %a_ptr
- %b = load i64 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a_ptr
+ %b = load i64, i64 addrspace(1)* %b_ptr
%result = add i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -22,7 +22,7 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
; SI: v_add_i32
; SI: v_addc_u32
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
- %foo = load i64 addrspace(1)* %in, align 8
+ %foo = load i64, i64 addrspace(1)* %in, align 8
%result = add i64 %foo, %a
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -35,7 +35,7 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
; SI: v_add_i32
; SI: v_addc_u32
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
- %foo = load i64 addrspace(1)* %in, align 8
+ %foo = load i64, i64 addrspace(1)* %in, align 8
%result = add i64 %a, %foo
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -60,10 +60,10 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,
; SI: v_addc_u32
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <2 x i64> addrspace(1)* %a_ptr
- %b = load <2 x i64> addrspace(1)* %b_ptr
+ %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
+ %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
+ %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%result = add <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
index 74ea9f0..4be8c58 100644
--- a/test/CodeGen/R600/address-space.ll
+++ b/test/CodeGen/R600/address-space.ll
@@ -16,13 +16,13 @@
; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG2]] offset:20
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
- %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
- %y = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
+ %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
+ %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
br label %bb32
bb32:
- %a = load float addrspace(3)* %x, align 4
- %b = load float addrspace(3)* %y, align 4
+ %a = load float, float addrspace(3)* %x, align 4
+ %b = load float, float addrspace(3)* %y, align 4
%cmp = fcmp one float %a, %b
br i1 %cmp, label %bb34, label %bb33
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll
index bb7cba3..5672d47 100644
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -10,9 +10,9 @@
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = and <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -30,9 +30,9 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = and <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -57,8 +57,8 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
; FUNC-LABEL: {{^}}v_and_i32:
; SI: v_and_b32
define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%and = and i32 %a, %b
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@@ -67,7 +67,7 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr
; FUNC-LABEL: {{^}}v_and_constant_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
- %a = load i32 addrspace(1)* %aptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, 1234567
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@@ -76,7 +76,7 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr)
; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
- %a = load i32 addrspace(1)* %aptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, 64
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@@ -85,7 +85,7 @@ define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %
; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
- %a = load i32 addrspace(1)* %aptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, -16
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
@@ -120,8 +120,8 @@ define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
; SI: v_and_b32
; SI: v_and_b32
define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%and = and i64 %a, %b
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@@ -136,8 +136,8 @@ entry:
br i1 %tmp0, label %if, label %endif
if:
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%and = and i64 %a, %b
br label %endif
@@ -151,7 +151,7 @@ endif:
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
- %a = load i64 addrspace(1)* %aptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@@ -162,7 +162,7 @@ define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr)
; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
- %a = load i64 addrspace(1)* %aptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 64
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index 33a8aee..8c2a079 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -20,24 +20,24 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
-; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
+; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
%tid = call i32 @llvm.SI.tid() readnone
- %a_ptr = getelementptr i32 addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid
- %a = load i32 addrspace(1)* %a_ptr
- %b = load i32 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr i32, i32 addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
+ %a = load i32, i32 addrspace(1)* %a_ptr
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = add i32 %a, %b
- %alloca_ptr = getelementptr [4 x i32]* %alloca, i32 1, i32 %b
+ %alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
store i32 %result, i32* %alloca_ptr, align 4
; Dummy call
call void @llvm.AMDGPU.barrier.local() nounwind noduplicate
- %reload = load i32* %alloca_ptr, align 4
- %out_ptr = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %reload = load i32, i32* %alloca_ptr, align 4
+ %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
ret void
}
diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll
index 32e657d..eae095e 100644
--- a/test/CodeGen/R600/array-ptr-calc-i64.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -7,10 +7,10 @@ declare i32 @llvm.SI.tid() readnone
; SI: v_mul_hi_i32
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.SI.tid() readnone
- %a_ptr = getelementptr [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
- %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid
- %a = load i32 addrspace(1)* %a_ptr
- %b = load i32 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
+ %a = load i32, i32 addrspace(1)* %a_ptr
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = add i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll
index 6c76ad7..ef2560e 100644
--- a/test/CodeGen/R600/atomic_cmp_swap_local.ll
+++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll
@@ -13,7 +13,7 @@
; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -34,7 +34,7 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
%result = extractvalue { i64, i1 } %pair, 0
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -48,7 +48,7 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -66,7 +66,7 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
ret void
@@ -85,7 +85,7 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw
; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
; GCN: s_endpgm
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
%result = extractvalue { i64, i1 } %pair, 0
ret void
diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll
index 5fe05f2..20c6854 100644
--- a/test/CodeGen/R600/atomic_load_add.ll
+++ b/test/CodeGen/R600/atomic_load_add.ll
@@ -14,7 +14,7 @@ define void @atomic_add_local(i32 addrspace(3)* %local) {
; R600: LDS_ADD *
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
- %gep = getelementptr i32 addrspace(3)* %local, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
ret void
}
@@ -32,7 +32,7 @@ define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; R600: LDS_ADD_RET *
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
- %gep = getelementptr i32 addrspace(3)* %local, i32 5
+ %gep = getelementptr i32, i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
index 4072283..4c6f455 100644
--- a/test/CodeGen/R600/atomic_load_sub.ll
+++ b/test/CodeGen/R600/atomic_load_sub.ll
@@ -14,7 +14,7 @@ define void @atomic_sub_local(i32 addrspace(3)* %local) {
; R600: LDS_SUB *
; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
- %gep = getelementptr i32 addrspace(3)* %local, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
ret void
}
@@ -32,7 +32,7 @@ define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; R600: LDS_SUB_RET *
; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
- %gep = getelementptr i32 addrspace(3)* %local, i32 5
+ %gep = getelementptr i32, i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll
index 6fe23e9..32e3fc2 100644
--- a/test/CodeGen/R600/bfe_uint.ll
+++ b/test/CodeGen/R600/bfe_uint.ll
@@ -1,7 +1,5 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; XFAIL: *
-
; CHECK: {{^}}bfe_def:
; CHECK: BFE_UINT
define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 28be216..2671c5d 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -51,29 +51,29 @@ main_body:
%43 = extractelement <4 x float> %reg7, i32 1
%44 = extractelement <4 x float> %reg7, i32 2
%45 = extractelement <4 x float> %reg7, i32 3
- %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %46 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
%47 = extractelement <4 x float> %46, i32 0
- %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
%49 = extractelement <4 x float> %48, i32 1
- %50 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %50 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
%51 = extractelement <4 x float> %50, i32 2
- %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+ %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
%53 = extractelement <4 x float> %52, i32 0
- %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %54 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
%55 = extractelement <4 x float> %54, i32 0
- %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
%57 = extractelement <4 x float> %56, i32 1
- %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %58 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
%59 = extractelement <4 x float> %58, i32 2
- %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
%61 = extractelement <4 x float> %60, i32 3
- %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %62 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
%63 = extractelement <4 x float> %62, i32 0
- %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
%65 = extractelement <4 x float> %64, i32 1
- %66 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %66 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
%67 = extractelement <4 x float> %66, i32 2
- %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%69 = extractelement <4 x float> %68, i32 0
%70 = fcmp oge float %69, 3.500000e+00
%71 = sext i1 %70 to i32
@@ -81,7 +81,7 @@ main_body:
%73 = bitcast float %72 to i32
%74 = icmp ne i32 %73, 0
%. = select i1 %74, float 0.000000e+00, float 0.000000e+00
- %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %75 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%76 = extractelement <4 x float> %75, i32 0
%77 = fcmp oge float %76, 2.000000e+00
%78 = sext i1 %77 to i32
@@ -135,7 +135,7 @@ IF137: ; preds = %main_body
%123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3
%124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123)
%125 = fdiv float 1.000000e+00, %124
- %126 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%127 = extractelement <4 x float> %126, i32 0
%128 = fmul float %127, %125
%129 = fmul float %103, %128
@@ -347,15 +347,15 @@ ENDIF136: ; preds = %main_body, %ENDIF15
%329 = fmul float %314, %328
%330 = fmul float %316, %328
%331 = fmul float %318, %328
- %332 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %332 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%333 = extractelement <4 x float> %332, i32 0
%334 = fsub float -0.000000e+00, %333
%335 = fadd float 1.000000e+00, %334
- %336 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %336 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%337 = extractelement <4 x float> %336, i32 0
%338 = fsub float -0.000000e+00, %337
%339 = fadd float 1.000000e+00, %338
- %340 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %340 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%341 = extractelement <4 x float> %340, i32 0
%342 = fsub float -0.000000e+00, %341
%343 = fadd float 1.000000e+00, %342
@@ -1018,7 +1018,7 @@ ENDIF175: ; preds = %ENDIF172, %IF176
%temp92.11 = phi float [ %877, %IF176 ], [ %temp92.10, %ENDIF172 ]
%temp93.5 = phi float [ %878, %IF176 ], [ %temp93.4, %ENDIF172 ]
%temp94.5 = phi float [ %879, %IF176 ], [ %temp94.4, %ENDIF172 ]
- %880 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %880 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
%881 = extractelement <4 x float> %880, i32 0
%882 = fcmp olt float %881, %179
%883 = sext i1 %882 to i32
@@ -1114,12 +1114,12 @@ ENDIF178: ; preds = %ENDIF175, %IF179
%960 = fmul float %temp87.6, %956
%961 = fmul float %2, -2.000000e+00
%962 = fadd float %961, 1.000000e+00
- %963 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+ %963 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
%964 = extractelement <4 x float> %963, i32 2
%965 = fsub float -0.000000e+00, %964
%966 = fadd float %962, %965
%967 = fdiv float 1.000000e+00, %966
- %968 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+ %968 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
%969 = extractelement <4 x float> %968, i32 2
%970 = fmul float %969, %967
%971 = fsub float -0.000000e+00, %53
diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll
index 1ba64af..fd56d95 100644
--- a/test/CodeGen/R600/bitcast.ll
+++ b/test/CodeGen/R600/bitcast.ll
@@ -9,7 +9,7 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
; SI: s_endpgm
define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
entry:
- %1 = load <32 x i8> addrspace(2)* %0
+ %1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
%2 = bitcast <32 x i8> %1 to <8 x i32>
%3 = extractelement <8 x i32> %2, i32 1
%4 = icmp ne i32 %3, 0
@@ -23,34 +23,34 @@ entry:
define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
- %1 = load <16 x i8> addrspace(1)* %0
+ %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
store <16 x i8> %1, <16 x i8> addrspace(1)* %out
ret void
}
define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %load = load float addrspace(1)* %in, align 4
+ %load = load float, float addrspace(1)* %in, align 4
%bc = bitcast float %load to <2 x i16>
store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
ret void
}
define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in, align 4
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
%bc = bitcast <2 x i16> %load to float
store float %bc, float addrspace(1)* %out, align 4
ret void
}
define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 4
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%bc = bitcast <4 x i8> %load to i32
store i32 %bc, i32 addrspace(1)* %out, align 4
ret void
}
define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %load to <4 x i8>
store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
ret void
@@ -59,7 +59,7 @@ define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nou
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
; SI: s_endpgm
define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %val = load <2 x i32> addrspace(1)* %in, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%add = add <2 x i32> %val, <i32 4, i32 9>
%bc = bitcast <2 x i32> %add to double
store double %bc, double addrspace(1)* %out, align 8
@@ -69,7 +69,7 @@ define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
; SI: s_endpgm
define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
- %val = load double addrspace(1)* %in, align 8
+ %val = load double, double addrspace(1)* %in, align 8
%add = fadd double %val, 4.0
%bc = bitcast double %add to <2 x i32>
store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll
index e93543d..4cf8e4b 100644
--- a/test/CodeGen/R600/bswap.ll
+++ b/test/CodeGen/R600/bswap.ll
@@ -18,7 +18,7 @@ declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
store i32 %bswap, i32 addrspace(1)* %out, align 4
ret void
@@ -33,7 +33,7 @@ define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
- %val = load <2 x i32> addrspace(1)* %in, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8
ret void
@@ -54,7 +54,7 @@ define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
- %val = load <4 x i32> addrspace(1)* %in, align 16
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16
ret void
@@ -87,28 +87,28 @@ define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
- %val = load <8 x i32> addrspace(1)* %in, align 32
+ %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
%bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
ret void
}
define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
- %val = load i64 addrspace(1)* %in, align 8
+ %val = load i64, i64 addrspace(1)* %in, align 8
%bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
store i64 %bswap, i64 addrspace(1)* %out, align 8
ret void
}
define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
- %val = load <2 x i64> addrspace(1)* %in, align 16
+ %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone
store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16
ret void
}
define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
- %val = load <4 x i64> addrspace(1)* %in, align 32
+ %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
%bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone
store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32
ret void
diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll
index 6de51f1..eb71649 100644
--- a/test/CodeGen/R600/call.ll
+++ b/test/CodeGen/R600/call.ll
@@ -13,9 +13,9 @@ define i32 @defined_function(i32 %x) nounwind noinline {
}
define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%c = call i32 @defined_function(i32 %b) nounwind
%result = add i32 %a, %c
store i32 %result, i32 addrspace(1)* %out
@@ -23,9 +23,9 @@ define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
}
define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%c = call i32 @external_function(i32 %b) nounwind
%result = add i32 %a, %c
store i32 %result, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll
index db2cb6e..87bebbc 100644
--- a/test/CodeGen/R600/call_fs.ll
+++ b/test/CodeGen/R600/call_fs.ll
@@ -2,11 +2,11 @@
; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s
; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s
-; EG: {{^}}call_fs:
; EG: .long 257
+; EG: {{^}}call_fs:
; EG: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
-; R600: {{^}}call_fs:
; R600: .long 257
+; R600: {{^}}call_fs:
; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
diff --git a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
index e16a397..0aecc18 100644
--- a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
@@ -14,7 +14,7 @@ define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
entry:
%0 = mul nsw i32 %a, 3
%1 = sext i32 %0 to i64
- %2 = getelementptr i8 addrspace(1)* %in, i64 %1
+ %2 = getelementptr i8, i8 addrspace(1)* %in, i64 %1
store i8 %b, i8 addrspace(1)* %2
ret void
}
diff --git a/test/CodeGen/R600/combine_vloads.ll b/test/CodeGen/R600/combine_vloads.ll
index 38420b2..01572af 100644
--- a/test/CodeGen/R600/combine_vloads.ll
+++ b/test/CodeGen/R600/combine_vloads.ll
@@ -23,7 +23,7 @@ for.body: ; preds = %for.body, %entry
%i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
%arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
%0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
- %vecload2 = load <8 x i32> addrspace(1)* %0, align 32
+ %vecload2 = load <8 x i32>, <8 x i32> addrspace(1)* %0, align 32
%1 = bitcast <8 x i32> %vecload2 to <32 x i8>
%tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -32,7 +32,7 @@ for.body: ; preds = %for.body, %entry
%tmp13 = add nsw <8 x i8> %tmp9, %tmp12
%tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%tmp17 = add nsw <8 x i8> %tmp13, %tmp16
- %scevgep = getelementptr <8 x i8> addrspace(1)* %result, i32 %i.01
+ %scevgep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %result, i32 %i.01
%2 = bitcast <8 x i8> %tmp17 to <2 x i32>
%3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)*
store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8
diff --git a/test/CodeGen/R600/commute-compares.ll b/test/CodeGen/R600/commute-compares.ll
new file mode 100644
index 0000000..3176604
--- /dev/null
+++ b/test/CodeGen/R600/commute-compares.ll
@@ -0,0 +1,697 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+; --------------------------------------------------------------------------------
+; i32 compares
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}commute_eq_64_i32:
+; GCN: v_cmp_eq_i32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp eq i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ne_64_i32:
+; GCN: v_cmp_ne_i32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ne i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FIXME: Why isn't this being folded as a constant?
+; GCN-LABEL: {{^}}commute_ne_litk_i32:
+; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039
+; GCN: v_cmp_ne_i32_e32 vcc, [[K]], v{{[0-9]+}}
+define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ne i32 %val, 12345
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ugt_64_i32:
+; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ugt i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uge_64_i32:
+; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
+define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp uge i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ult_64_i32:
+; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ult i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ule_63_i32:
+; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ule i32 %val, 63
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
+
+; GCN-LABEL: {{^}}commute_ule_64_i32:
+; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}}
+; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
+define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ule i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
+; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}}
+define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp sgt i32 %val, -1
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sge_neg2_i32:
+; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
+define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp sge i32 %val, -2
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_slt_neg16_i32:
+; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
+define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp slt i32 %val, -16
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sle_5_i32:
+; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
+define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp sle i32 %val, 5
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; i64 compares
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}commute_eq_64_i64:
+; GCN: v_cmp_eq_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp eq i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ne_64_i64:
+; GCN: v_cmp_ne_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ne i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ugt_64_i64:
+; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ugt i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uge_64_i64:
+; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp uge i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ult_64_i64:
+; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ult i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ule_63_i64:
+; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ule i64 %val, 63
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
+
+; GCN-LABEL: {{^}}commute_ule_64_i64:
+; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}}
+; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ule i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sgt_neg1_i64:
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp sgt i64 %val, -1
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sge_neg2_i64:
+; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp sge i64 %val, -2
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_slt_neg16_i64:
+; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp slt i64 %val, -16
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sle_5_i64:
+; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp sle i64 %val, 5
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; f32 compares
+; --------------------------------------------------------------------------------
+
+
+; GCN-LABEL: {{^}}commute_oeq_2.0_f32:
+; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp oeq float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}commute_ogt_2.0_f32:
+; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ogt float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_oge_2.0_f32:
+; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp oge float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_olt_2.0_f32:
+; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp olt float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ole_2.0_f32:
+; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ole float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_one_2.0_f32:
+; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp one float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ord_2.0_f32:
+; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
+define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ord float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ueq_2.0_f32:
+; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ueq float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ugt_2.0_f32:
+; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ugt float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uge_2.0_f32:
+; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp uge float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ult_2.0_f32:
+; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ult float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ule_2.0_f32:
+; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ule float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_une_2.0_f32:
+; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp une float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uno_2.0_f32:
+; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
+define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp uno float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; f64 compares
+; --------------------------------------------------------------------------------
+
+
+; GCN-LABEL: {{^}}commute_oeq_2.0_f64:
+; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp oeq double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}commute_ogt_2.0_f64:
+; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ogt double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_oge_2.0_f64:
+; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp oge double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_olt_2.0_f64:
+; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp olt double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ole_2.0_f64:
+; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ole double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_one_2.0_f64:
+; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp one double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ord_2.0_f64:
+; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
+define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ord double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ueq_2.0_f64:
+; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ueq double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ugt_2.0_f64:
+; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ugt double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uge_2.0_f64:
+; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp uge double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ult_2.0_f64:
+; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ult double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ule_2.0_f64:
+; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ule double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_une_2.0_f64:
+; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp une double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uno_2.0_f64:
+; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
+define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp uno double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/commute_modifiers.ll b/test/CodeGen/R600/commute_modifiers.ll
index 6fddb6d..7fc36ea 100644
--- a/test/CodeGen/R600/commute_modifiers.ll
+++ b/test/CodeGen/R600/commute_modifiers.ll
@@ -10,8 +10,8 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %x = load float addrspace(1)* %gep.0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %x = load float, float addrspace(1)* %gep.0
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%z = fadd float 2.0, %x.fabs
store float %z, float addrspace(1)* %out
@@ -24,8 +24,8 @@ define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %x = load float addrspace(1)* %gep.0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %x = load float, float addrspace(1)* %gep.0
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
%z = fmul float 4.0, %x.fneg.fabs
@@ -39,8 +39,8 @@ define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrs
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %x = load float addrspace(1)* %gep.0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %x = load float, float addrspace(1)* %gep.0
%x.fneg = fsub float -0.000000e+00, %x
%z = fmul float 4.0, %x.fneg
store float %z, float addrspace(1)* %out
@@ -55,8 +55,8 @@ define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %x = load float addrspace(1)* %gep.0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %x = load float, float addrspace(1)* %gep.0
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%z = fadd float 1024.0, %x.fabs
store float %z, float addrspace(1)* %out
@@ -70,10 +70,10 @@ define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%y.fabs = call float @llvm.fabs.f32(float %y) #1
%z = fadd float %x, %y.fabs
store float %z, float addrspace(1)* %out
@@ -87,10 +87,10 @@ define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)*
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%y.fneg = fsub float -0.000000e+00, %y
%z = fmul float %x, %y.fneg
store float %z, float addrspace(1)* %out
@@ -104,10 +104,10 @@ define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)*
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%y.fabs = call float @llvm.fabs.f32(float %y) #1
%y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
%z = fmul float %x, %y.fabs.fneg
@@ -123,10 +123,10 @@ define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%y.fabs = call float @llvm.fabs.f32(float %y) #1
%z = fmul float %x.fabs, %y.fabs
@@ -141,10 +141,10 @@ define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrs
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%y.fabs = call float @llvm.fabs.f32(float %y) #1
%y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
@@ -163,12 +163,12 @@ define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float
; SI: buffer_store_dword [[RESULT]]
define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r2.fabs = call float @llvm.fabs.f32(float %r2)
diff --git a/test/CodeGen/R600/copy-illegal-type.ll b/test/CodeGen/R600/copy-illegal-type.ll
index 56c43d2..8b39756 100644
--- a/test/CodeGen/R600/copy-illegal-type.ll
+++ b/test/CodeGen/R600/copy-illegal-type.ll
@@ -6,7 +6,7 @@
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
@@ -17,7 +17,7 @@ define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)*
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
ret void
@@ -30,7 +30,7 @@ define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@@ -45,7 +45,7 @@ define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@@ -82,7 +82,7 @@ define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; SI: s_endpgm
define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@@ -120,7 +120,7 @@ define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> add
; SI: s_endpgm
define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@@ -133,7 +133,7 @@ define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8>
; SI-NOT: bfi
; SI: s_endpgm
define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
- %val = load <3 x i8> addrspace(1)* %in, align 4
+ %val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
ret void
}
@@ -145,7 +145,7 @@ define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)*
; SI: buffer_load_ubyte
; SI: s_endpgm
define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load volatile <4 x i8> addrspace(1)* %in, align 4
+ %val = load volatile <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
@@ -161,7 +161,7 @@ define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8>
; SI: buffer_store_byte
; SI: s_endpgm
define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/R600/copy-to-reg.ll
index 9c1de73..fc875f6 100644
--- a/test/CodeGen/R600/copy-to-reg.ll
+++ b/test/CodeGen/R600/copy-to-reg.ll
@@ -13,15 +13,15 @@ entry:
loop:
%inc = phi i32 [0, %entry], [%inc.i, %loop]
- %ptr = getelementptr [16 x i32]* %alloca, i32 0, i32 %inc
+ %ptr = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %inc
store i32 %inc, i32* %ptr
%inc.i = add i32 %inc, 1
%cnd = icmp uge i32 %inc.i, 16
br i1 %cnd, label %done, label %loop
done:
- %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 0
- %tmp1 = load i32* %tmp0
+ %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0
+ %tmp1 = load i32, i32* %tmp0
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll
index 1a4317b..bd26c30 100644
--- a/test/CodeGen/R600/ctlz_zero_undef.ll
+++ b/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -28,7 +28,7 @@ define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
- %val = load i32 addrspace(1)* %valptr, align 4
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
@@ -44,7 +44,7 @@ define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
- %val = load <2 x i32> addrspace(1)* %valptr, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
ret void
@@ -64,7 +64,7 @@ define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
- %val = load <4 x i32> addrspace(1)* %valptr, align 16
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
index 6f7d92b..0a031c5 100644
--- a/test/CodeGen/R600/ctpop.ll
+++ b/test/CodeGen/R600/ctpop.ll
@@ -31,7 +31,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; EG: BCNT_INT
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
store i32 %ctpop, i32 addrspace(1)* %out, align 4
ret void
@@ -49,8 +49,8 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
- %val0 = load i32 addrspace(1)* %in0, align 4
- %val1 = load i32 addrspace(1)* %in1, align 4
+ %val0 = load i32, i32 addrspace(1)* %in0, align 4
+ %val1 = load i32, i32 addrspace(1)* %in1, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
%ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
%add = add i32 %ctpop0, %ctpop1
@@ -65,7 +65,7 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
; GCN-NEXT: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
- %val0 = load i32 addrspace(1)* %in0, align 4
+ %val0 = load i32, i32 addrspace(1)* %in0, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
%add = add i32 %ctpop0, %sval
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -80,7 +80,7 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
- %val = load <2 x i32> addrspace(1)* %in, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
ret void
@@ -98,7 +98,7 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
- %val = load <4 x i32> addrspace(1)* %in, align 16
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
ret void
@@ -124,7 +124,7 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
- %val = load <8 x i32> addrspace(1)* %in, align 32
+ %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
ret void
@@ -166,7 +166,7 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
- %val = load <16 x i32> addrspace(1)* %in, align 32
+ %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32
%ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
ret void
@@ -180,7 +180,7 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, 4
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -195,7 +195,7 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 4, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -210,7 +210,7 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, 99999
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -226,7 +226,7 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
; EG: BCNT_INT
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, %const
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -242,7 +242,7 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
; EG: BCNT_INT
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %const, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -259,10 +259,10 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
; EG: BCNT_INT
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
- %gep = getelementptr i32 addrspace(1)* %constptr, i32 4
- %const = load i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4
+ %const = load i32, i32 addrspace(1)* %gep, align 4
%add = add i32 %const, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4
ret void
@@ -289,8 +289,8 @@ if:
br label %endif
else:
- %tmp3 = getelementptr i32 addrspace(1)* %in, i32 1
- %tmp4 = load i32 addrspace(1)* %tmp3
+ %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %tmp4 = load i32, i32 addrspace(1)* %tmp3
br label %endif
endif:
diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll
index 8bcd818..e1a0ee3 100644
--- a/test/CodeGen/R600/ctpop64.ll
+++ b/test/CodeGen/R600/ctpop64.ll
@@ -29,7 +29,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %val = load i64 addrspace(1)* %in, align 8
+ %val = load i64, i64 addrspace(1)* %in, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
store i32 %truncctpop, i32 addrspace(1)* %out, align 4
@@ -67,7 +67,7 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
; GCN: v_bcnt_u32_b32
; GCN: s_endpgm
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
- %val = load <2 x i64> addrspace(1)* %in, align 16
+ %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
@@ -85,7 +85,7 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
; GCN: v_bcnt_u32_b32
; GCN: s_endpgm
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
- %val = load <4 x i64> addrspace(1)* %in, align 32
+ %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
@@ -113,8 +113,8 @@ if:
br label %endif
else:
- %tmp3 = getelementptr i64 addrspace(1)* %in, i32 1
- %tmp4 = load i64 addrspace(1)* %tmp3
+ %tmp3 = getelementptr i64, i64 addrspace(1)* %in, i32 1
+ %tmp4 = load i64, i64 addrspace(1)* %tmp3
br label %endif
endif:
diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll
index d9d284c..56fcb51 100644
--- a/test/CodeGen/R600/cttz_zero_undef.ll
+++ b/test/CodeGen/R600/cttz_zero_undef.ll
@@ -28,7 +28,7 @@ define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
- %val = load i32 addrspace(1)* %valptr, align 4
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
store i32 %cttz, i32 addrspace(1)* %out, align 4
ret void
@@ -44,7 +44,7 @@ define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
- %val = load <2 x i32> addrspace(1)* %valptr, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
ret void
@@ -64,7 +64,7 @@ define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
- %val = load <4 x i32> addrspace(1)* %valptr, align 16
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
index 4d4bf93..3399d9d 100644
--- a/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -8,7 +8,7 @@
; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
; SI: buffer_store_dword [[CONV]],
define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
- %load = load i8 addrspace(1)* %in, align 1
+ %load = load i8, i8 addrspace(1)* %in, align 1
%cvt = uitofp i8 %load to float
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -23,7 +23,7 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in, align 2
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
ret void
@@ -37,7 +37,7 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
; SI-DAG: v_cvt_f32_ubyte0_e32
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <3 x i8> addrspace(1)* %in, align 4
+ %load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <3 x i8> %load to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
ret void
@@ -53,7 +53,7 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 4
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
ret void
@@ -77,7 +77,7 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 1
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
ret void
@@ -105,7 +105,7 @@ define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out
; XSI: v_cvt_f32_u32_e32
; SI: s_endpgm
define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 4
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
%add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
@@ -117,7 +117,7 @@ define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <
; SI-LABEL: {{^}}load_v7i8_to_v7f32:
; SI: s_endpgm
define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <7 x i8> addrspace(1)* %in, align 1
+ %load = load <7 x i8>, <7 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <7 x i8> %load to <7 x float>
store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
ret void
@@ -146,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
; SI: buffer_store_dword
; SI: buffer_store_dword
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in, align 8
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
ret void
@@ -158,7 +158,7 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
; SI: buffer_store_dword [[CONV]],
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 2
%inreg = and i32 %add, 255
%cvt = uitofp i32 %inreg to float
@@ -168,7 +168,7 @@ define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addr
; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%inreg = and i32 %load, 65280
%shr = lshr i32 %inreg, 8
%cvt = uitofp i32 %shr to float
@@ -180,7 +180,7 @@ define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addr
; We don't get these ones because of the zext, but instcombine removes
; them so it shouldn't really matter.
define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
- %load = load i8 addrspace(1)* %in, align 1
+ %load = load i8, i8 addrspace(1)* %in, align 1
%ext = zext i8 %load to i32
%cvt = uitofp i32 %ext to float
store float %cvt, float addrspace(1)* %out, align 4
@@ -188,7 +188,7 @@ define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1
}
define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 1
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
%ext = zext <4 x i8> %load to <4 x i32>
%cvt = uitofp <4 x i32> %ext to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index 1e47bfa..fb43ff4 100644
--- a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -12,8 +12,8 @@
define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %sint = load i32 addrspace(1) * %in
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %sint = load i32, i32 addrspace(1) * %in
%conv = sitofp i32 %sint to float
%0 = insertelement <4 x float> undef, float %conv, i32 0
%splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
@@ -26,8 +26,8 @@ entry:
define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %uint = load i32 addrspace(1) * %in
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %uint = load i32, i32 addrspace(1) * %in
%conv = uitofp i32 %uint to float
%0 = insertelement <4 x float> undef, float %conv, i32 0
%splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
index 858e4b9..cdd2c0c 100644
--- a/test/CodeGen/R600/disconnected-predset-break-bug.ll
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -18,7 +18,7 @@ for.body: ; preds = %for.body, %entry
%i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
%ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%i.07 = add nsw i32 %i.07.in, -1
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %ai.06
store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %ai.06, 1
%exitcond = icmp eq i32 %add, %iterations
diff --git a/test/CodeGen/R600/dot4-folding.ll b/test/CodeGen/R600/dot4-folding.ll
index dca6a59..4df7b63 100644
--- a/test/CodeGen/R600/dot4-folding.ll
+++ b/test/CodeGen/R600/dot4-folding.ll
@@ -14,8 +14,8 @@
define void @main(float addrspace(1)* %out) {
main_body:
- %0 = load <4 x float> addrspace(8)* null
- %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
+ %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
%3 = insertelement <4 x float> undef, float %2, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
index 41afd50..c381fc4 100644
--- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
+++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
@@ -33,20 +33,20 @@ for.body: ; preds = %for.body, %entry
%offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
%k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
tail call void @llvm.AMDGPU.barrier.local() #1
- %arrayidx = getelementptr inbounds float addrspace(3)* %lptr, i32 %offset.02
- %tmp = load float addrspace(3)* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02
+ %tmp = load float, float addrspace(3)* %arrayidx, align 4
%add1 = add nsw i32 %offset.02, 1
- %arrayidx2 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add1
- %tmp1 = load float addrspace(3)* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add1
+ %tmp1 = load float, float addrspace(3)* %arrayidx2, align 4
%add3 = add nsw i32 %offset.02, 32
- %arrayidx4 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add3
- %tmp2 = load float addrspace(3)* %arrayidx4, align 4
+ %arrayidx4 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add3
+ %tmp2 = load float, float addrspace(3)* %arrayidx4, align 4
%add5 = add nsw i32 %offset.02, 33
- %arrayidx6 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add5
- %tmp3 = load float addrspace(3)* %arrayidx6, align 4
+ %arrayidx6 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add5
+ %tmp3 = load float, float addrspace(3)* %arrayidx6, align 4
%add7 = add nsw i32 %offset.02, 64
- %arrayidx8 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add7
- %tmp4 = load float addrspace(3)* %arrayidx8, align 4
+ %arrayidx8 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add7
+ %tmp4 = load float, float addrspace(3)* %arrayidx8, align 4
%add9 = fadd float %tmp, %tmp1
%add10 = fadd float %add9, %tmp2
%add11 = fadd float %add10, %tmp3
@@ -59,7 +59,7 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
%tmp5 = sext i32 %x.i to i64
- %arrayidx15 = getelementptr inbounds float addrspace(1)* %out, i64 %tmp5
+ %arrayidx15 = getelementptr inbounds float, float addrspace(1)* %out, i64 %tmp5
store float %add13, float addrspace(1)* %arrayidx15, align 4
ret void
}
diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll
index c06b0b1..7110a90 100644
--- a/test/CodeGen/R600/ds_read2.ll
+++ b/test/CodeGen/R600/ds_read2.ll
@@ -14,13 +14,13 @@
; SI: s_endpgm
define void @simple_read2_f32(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -33,13 +33,13 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 255
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -51,13 +51,13 @@ define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 257
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -69,25 +69,25 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 0
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum.0 = fadd float %val0, %val1
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
- %val2 = load float addrspace(3)* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
- %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float, float addrspace(3)* %arrayidx3, align 4
%sum.1 = fadd float %val2, %val3
%sum = fadd float %sum.0, %sum.1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -101,27 +101,27 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 0
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum.0 = fadd float %val0, %val1
call void @llvm.AMDGPU.barrier.local() #2
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
- %val2 = load float addrspace(3)* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
- %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float, float addrspace(3)* %arrayidx3, align 4
%sum.1 = fadd float %val2, %val3
%sum = fadd float %sum.0, %sum.1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -136,25 +136,25 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum.0 = fadd float %val0, %val1
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
- %val2 = load float addrspace(3)* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
- %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float, float addrspace(3)* %arrayidx3, align 4
%sum.1 = fadd float %val2, %val3
%sum = fadd float %sum.0, %sum.1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -174,14 +174,14 @@ define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float ad
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
- %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
- %val0 = load float addrspace(3)* %gep.0, align 4
- %val1 = load float addrspace(3)* %gep.1, align 4
+ %val0 = load float, float addrspace(3)* %gep.0, align 4
+ %val1 = load float, float addrspace(3)* %gep.1, align 4
%add.x = add nsw i32 %x.i, 8
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -200,18 +200,18 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
- %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
; Apply an additional offset after the vector that will be more obviously folded.
- %gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
+ %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
- %val0 = load float addrspace(3)* %gep.0, align 4
- %val1 = load float addrspace(3)* %gep.1.offset, align 4
+ %val0 = load float, float addrspace(3)* %gep.0, align 4
+ %val1 = load float, float addrspace(3)* %gep.1.offset, align 4
%add.x = add nsw i32 %x.i, 8
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -228,14 +228,14 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
%x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%x.i.v.1 = insertelement <2 x i32> %x.i.v.0, i32 %x.i, i32 1
%idx = add <2 x i32> %x.i.v.1, <i32 0, i32 8>
- %gep = getelementptr inbounds <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
+ %gep = getelementptr inbounds [512 x float], <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
- %val0 = load float addrspace(3)* %gep.0, align 4
- %val1 = load float addrspace(3)* %gep.1, align 4
+ %val0 = load float, float addrspace(3)* %gep.0, align 4
+ %val1 = load float, float addrspace(3)* %gep.1, align 4
%add.x = add nsw i32 %x.i, 8
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -247,13 +247,13 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load volatile float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -265,13 +265,13 @@ define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load volatile float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load volatile float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -284,13 +284,13 @@ define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 1
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 1
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 1
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 1
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -300,13 +300,13 @@ define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %
; SI: s_endpgm
define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 2
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 2
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 2
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 2
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -319,13 +319,13 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs
; SI: s_endpgm
define void @simple_read2_f64(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -335,13 +335,13 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 255
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -353,13 +353,13 @@ define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 257
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -371,13 +371,13 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 7
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 4
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 4
ret void
}
@@ -388,8 +388,8 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
- %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
- %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+ %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
%sum = add i32 %val0, %val1
store i32 %sum, i32 addrspace(1)* %out, align 4
ret void
@@ -399,8 +399,8 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
- %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
- %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+ %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
%sum = add i32 %val0, %val1
store i32 %sum, i32 addrspace(1)* %out, align 4
ret void
@@ -413,8 +413,8 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
- %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
- %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+ %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+ %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
%sum = add i64 %val0, %val1
store i64 %sum, i64 addrspace(1)* %out, align 8
ret void
@@ -429,8 +429,8 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
; SI: s_endpgm
define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
- %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
- %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+ %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+ %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
%sum = add i64 %val0, %val1
store i64 %sum, i64 addrspace(1)* %out, align 8
ret void
@@ -442,34 +442,34 @@ define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
- %arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
- %tmp16 = load float addrspace(3)* %arrayidx44, align 4
+ %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
+ %tmp16 = load float, float addrspace(3)* %arrayidx44, align 4
%add47 = add nsw i32 %x.i, 1
- %arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
- %tmp17 = load float addrspace(3)* %arrayidx48, align 4
+ %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
+ %tmp17 = load float, float addrspace(3)* %arrayidx48, align 4
%add51 = add nsw i32 %x.i, 16
- %arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
- %tmp18 = load float addrspace(3)* %arrayidx52, align 4
+ %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
+ %tmp18 = load float, float addrspace(3)* %arrayidx52, align 4
%add55 = add nsw i32 %x.i, 17
- %arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
- %tmp19 = load float addrspace(3)* %arrayidx56, align 4
- %arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
- %tmp20 = load float addrspace(3)* %arrayidx60, align 4
+ %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
+ %tmp19 = load float, float addrspace(3)* %arrayidx56, align 4
+ %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
+ %tmp20 = load float, float addrspace(3)* %arrayidx60, align 4
%add63 = add nsw i32 %y.i, 1
- %arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
- %tmp21 = load float addrspace(3)* %arrayidx64, align 4
+ %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
+ %tmp21 = load float, float addrspace(3)* %arrayidx64, align 4
%add67 = add nsw i32 %y.i, 32
- %arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
- %tmp22 = load float addrspace(3)* %arrayidx68, align 4
+ %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
+ %tmp22 = load float, float addrspace(3)* %arrayidx68, align 4
%add71 = add nsw i32 %y.i, 33
- %arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
- %tmp23 = load float addrspace(3)* %arrayidx72, align 4
+ %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
+ %tmp23 = load float, float addrspace(3)* %arrayidx72, align 4
%add75 = add nsw i32 %y.i, 64
- %arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
- %tmp24 = load float addrspace(3)* %arrayidx76, align 4
+ %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
+ %tmp24 = load float, float addrspace(3)* %arrayidx76, align 4
%add79 = add nsw i32 %y.i, 65
- %arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
- %tmp25 = load float addrspace(3)* %arrayidx80, align 4
+ %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
+ %tmp25 = load float, float addrspace(3)* %arrayidx80, align 4
%sum.0 = fadd float %tmp16, %tmp17
%sum.1 = fadd float %sum.0, %tmp18
%sum.2 = fadd float %sum.1, %tmp19
@@ -484,13 +484,13 @@ define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i
}
define void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 {
- %load = load <2 x i32> addrspace(3)* %in, align 4
+ %load = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4
store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8
ret void
}
define void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 {
- %load = load i64 addrspace(3)* %in, align 4
+ %load = load i64, i64 addrspace(3)* %in, align 4
store i64 %load, i64 addrspace(1)* %out, align 8
ret void
}
diff --git a/test/CodeGen/R600/ds_read2_offset_order.ll b/test/CodeGen/R600/ds_read2_offset_order.ll
index 44306bc..9ea9a5a 100644
--- a/test/CodeGen/R600/ds_read2_offset_order.ll
+++ b/test/CodeGen/R600/ds_read2_offset_order.ll
@@ -14,31 +14,31 @@
define void @offset_order(float addrspace(1)* %out) {
entry:
- %ptr0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 0
- %val0 = load float addrspace(3)* %ptr0
+ %ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 0
+ %val0 = load float, float addrspace(3)* %ptr0
- %ptr1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 256
- %val1 = load float addrspace(3)* %ptr1
+ %ptr1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 256
+ %val1 = load float, float addrspace(3)* %ptr1
%add1 = fadd float %val0, %val1
- %ptr2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 3
- %val2 = load float addrspace(3)* %ptr2
+ %ptr2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 3
+ %val2 = load float, float addrspace(3)* %ptr2
%add2 = fadd float %add1, %val2
- %ptr3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 2
- %val3 = load float addrspace(3)* %ptr3
+ %ptr3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2
+ %val3 = load float, float addrspace(3)* %ptr3
%add3 = fadd float %add2, %val3
- %ptr4 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 12
- %val4 = load float addrspace(3)* %ptr4
+ %ptr4 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 12
+ %val4 = load float, float addrspace(3)* %ptr4
%add4 = fadd float %add3, %val4
- %ptr5 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 14
- %val5 = load float addrspace(3)* %ptr5
+ %ptr5 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 14
+ %val5 = load float, float addrspace(3)* %ptr5
%add5 = fadd float %add4, %val5
- %ptr6 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 11
- %val6 = load float addrspace(3)* %ptr6
+ %ptr6 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11
+ %val6 = load float, float addrspace(3)* %ptr6
%add6 = fadd float %add5, %val6
store float %add6, float addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
index efd875e..482debb 100644
--- a/test/CodeGen/R600/ds_read2st64.ll
+++ b/test/CodeGen/R600/ds_read2st64.ll
@@ -12,13 +12,13 @@
; SI: s_endpgm
define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 64
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -32,13 +32,13 @@ define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 128
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -52,13 +52,13 @@ define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(
define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 16320
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -72,13 +72,13 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 16384
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -88,13 +88,13 @@ define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, floa
; SI: s_endpgm
define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 63
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -105,13 +105,13 @@ define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 127
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -124,13 +124,13 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 64
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -144,13 +144,13 @@ define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 128
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -163,13 +163,13 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
; SI: s_endpgm
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 64
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 4
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 4
ret void
}
@@ -184,13 +184,13 @@ define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspac
define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 256
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 8128
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -204,13 +204,13 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 8192
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -221,13 +221,13 @@ define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, dou
define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 8129
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -241,13 +241,13 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
; SI: s_endpgm
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 4
ret void
}
diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll
index e2db81a..1d94c57 100644
--- a/test/CodeGen/R600/ds_write2.ll
+++ b/test/CodeGen/R600/ds_write2.ll
@@ -11,12 +11,12 @@
; SI: s_endpgm
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
- %val = load float addrspace(1)* %in.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %val = load float, float addrspace(1)* %in.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -29,14 +29,14 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
; SI: s_endpgm
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %val0 = load float addrspace(1)* %in.gep.0, align 4
- %val1 = load float addrspace(1)* %in.gep.1, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float, float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -48,14 +48,14 @@ define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1
; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -67,14 +67,14 @@ define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float
; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -88,16 +88,16 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr <2 x float> addrspace(1)* %in.gep.0, i32 1
- %val0 = load <2 x float> addrspace(1)* %in.gep.0, align 8
- %val1 = load <2 x float> addrspace(1)* %in.gep.1, align 8
+ %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
+ %val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
+ %val1 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8
%val0.0 = extractelement <2 x float> %val0, i32 0
%val1.1 = extractelement <2 x float> %val1, i32 1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0.0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1.1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -109,14 +109,14 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
- %val = load <2 x float> addrspace(1)* %in.gep, align 8
+ %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
+ %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
%val0 = extractelement <2 x float> %val, i32 0
%val1 = extractelement <2 x float> %val, i32 1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -128,14 +128,14 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
; SI: s_endpgm
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
- %val = load <4 x float> addrspace(1)* %in.gep, align 16
+ %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
+ %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
%val0 = extractelement <4 x float> %val, i32 0
%val1 = extractelement <4 x float> %val, i32 3
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -148,14 +148,14 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
; SI: s_endpgm
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %val0 = load float addrspace(1)* %in.gep.0, align 4
- %val1 = load float addrspace(1)* %in.gep.1, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float, float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 255
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -166,14 +166,14 @@ define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float
; SI: s_endpgm
define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 257
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -184,25 +184,25 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
%idx.0 = add nsw i32 %tid.x, 0
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
store float %val0, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
store float %val1, float addrspace(3)* %arrayidx1, align 4
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
store float %val0, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
store float %val1, float addrspace(3)* %arrayidx3, align 4
ret void
@@ -214,25 +214,25 @@ define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspac
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
%idx.0 = add nsw i32 %tid.x, 3
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
store float %val0, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
store float %val1, float addrspace(3)* %arrayidx1, align 4
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
store float %val0, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
store float %val1, float addrspace(3)* %arrayidx3, align 4
ret void
@@ -245,19 +245,19 @@ define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, f
; SI: s_endpgm
define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
- %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
; Apply an additional offset after the vector that will be more obviously folded.
- %gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
+ %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
store float %val0, float addrspace(3)* %gep.0, align 4
%add.x = add nsw i32 %x.i, 8
@@ -272,12 +272,12 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
; SI: s_endpgm
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
- %val = load double addrspace(1)* %in.gep, align 8
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %val = load double, double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
store double %val, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
store double %val, double addrspace(3)* %arrayidx1, align 8
ret void
}
@@ -290,12 +290,12 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
; SI: s_endpgm
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
- %val = load double addrspace(1)* %in.gep, align 8
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %val = load double, double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
store double %val, double addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 7
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
store double %val, double addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -308,14 +308,14 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
; SI: s_endpgm
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
- %val0 = load double addrspace(1)* %in.gep.0, align 8
- %val1 = load double addrspace(1)* %in.gep.1, align 8
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
+ %val0 = load double, double addrspace(1)* %in.gep.0, align 8
+ %val1 = load double, double addrspace(1)* %in.gep.1, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
store double %val0, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
store double %val1, double addrspace(3)* %arrayidx1, align 8
ret void
}
@@ -326,8 +326,8 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
define void @store_constant_adjacent_offsets() {
- store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
- store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
ret void
}
@@ -336,8 +336,8 @@ define void @store_constant_adjacent_offsets() {
; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
define void @store_constant_disjoint_offsets() {
- store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
- store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
ret void
}
@@ -348,8 +348,8 @@ define void @store_constant_disjoint_offsets() {
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
define void @store_misaligned64_constant_offsets() {
- store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
- store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
ret void
}
@@ -362,8 +362,8 @@ define void @store_misaligned64_constant_offsets() {
; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
; SI: s_endpgm
define void @store_misaligned64_constant_large_offsets() {
- store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
- store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
ret void
}
@@ -373,34 +373,34 @@ define void @store_misaligned64_constant_large_offsets() {
define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
- %val = load float addrspace(1)* %in
- %arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
+ %val = load float, float addrspace(1)* %in
+ %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
store float %val, float addrspace(3)* %arrayidx44, align 4
%add47 = add nsw i32 %x.i, 1
- %arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
+ %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
store float %val, float addrspace(3)* %arrayidx48, align 4
%add51 = add nsw i32 %x.i, 16
- %arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
+ %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
store float %val, float addrspace(3)* %arrayidx52, align 4
%add55 = add nsw i32 %x.i, 17
- %arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
+ %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
store float %val, float addrspace(3)* %arrayidx56, align 4
- %arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
+ %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
store float %val, float addrspace(3)* %arrayidx60, align 4
%add63 = add nsw i32 %y.i, 1
- %arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
+ %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
store float %val, float addrspace(3)* %arrayidx64, align 4
%add67 = add nsw i32 %y.i, 32
- %arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
+ %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
store float %val, float addrspace(3)* %arrayidx68, align 4
%add71 = add nsw i32 %y.i, 33
- %arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
+ %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
store float %val, float addrspace(3)* %arrayidx72, align 4
%add75 = add nsw i32 %y.i, 64
- %arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
+ %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
store float %val, float addrspace(3)* %arrayidx76, align 4
%add79 = add nsw i32 %y.i, 65
- %arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
+ %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
store float %val, float addrspace(3)* %arrayidx80, align 4
ret void
}
diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll
index 0f1c662..2044df2 100644
--- a/test/CodeGen/R600/ds_write2st64.ll
+++ b/test/CodeGen/R600/ds_write2st64.ll
@@ -11,12 +11,12 @@
; SI: s_endpgm
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
- %val = load float addrspace(1)* %in.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %val = load float, float addrspace(1)* %in.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 64
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -29,15 +29,15 @@ define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float add
; SI: s_endpgm
define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %val0 = load float addrspace(1)* %in.gep.0, align 4
- %val1 = load float addrspace(1)* %in.gep.1, align 4
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float, float addrspace(1)* %in.gep.1, align 4
%add.x.0 = add nsw i32 %x.i, 128
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 320
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -50,14 +50,14 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %val0 = load float addrspace(1)* %in.gep.0, align 4
- %val1 = load float addrspace(1)* %in.gep.1, align 4
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float, float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 16320
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -70,15 +70,15 @@ define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, fl
; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
- %val0 = load double addrspace(1)* %in.gep.0, align 8
- %val1 = load double addrspace(1)* %in.gep.1, align 8
+ %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
+ %val0 = load double, double addrspace(1)* %in.gep.0, align 8
+ %val1 = load double, double addrspace(1)* %in.gep.1, align 8
%add.x.0 = add nsw i32 %x.i, 256
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
store double %val0, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 8128
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
store double %val1, double addrspace(3)* %arrayidx1, align 8
ret void
}
@@ -89,12 +89,12 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d
; SI: s_endpgm
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
- %val = load double addrspace(1)* %in.gep, align 8
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %val = load double, double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
store double %val, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
store double %val, double addrspace(3)* %arrayidx1, align 8
ret void
}
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
index aca3109..d0fd06a 100644
--- a/test/CodeGen/R600/elf.ll
+++ b/test/CodeGen/R600/elf.ll
@@ -1,7 +1,12 @@
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
+
+; Test that we don't try to produce a COFF file on windows
+; RUN: llc < %s -mtriple=amdgcn-pc-mingw -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
; ELF: Format: ELF32
; ELF: Name: .AMDGPU.config
@@ -11,11 +16,12 @@
; ELF: Name: test
; ELF: Binding: Global
-; CONFIG: .align 256
-; CONFIG: test:
; CONFIG: .section .AMDGPU.config
; CONFIG-NEXT: .long 45096
-; CONFIG-NEXT: .long 0
+; TYPICAL-NEXT: .long 0
+; TONGA-NEXT: .long 576
+; CONFIG: .align 256
+; CONFIG: test:
define void @test(i32 %p) #0 {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
diff --git a/test/CodeGen/R600/empty-function.ll b/test/CodeGen/R600/empty-function.ll
index b5593eb..a060900 100644
--- a/test/CodeGen/R600/empty-function.ll
+++ b/test/CodeGen/R600/empty-function.ll
@@ -3,16 +3,16 @@
; Make sure we don't assert on empty functions
-; SI-LABEL: {{^}}empty_function_ret:
; SI: .text
+; SI-LABEL: {{^}}empty_function_ret:
; SI: s_endpgm
; SI: codeLenInByte = 4
define void @empty_function_ret() #0 {
ret void
}
-; SI-LABEL: {{^}}empty_function_unreachable:
; SI: .text
+; SI-LABEL: {{^}}empty_function_unreachable:
; SI: codeLenInByte = 0
define void @empty_function_unreachable() #0 {
unreachable
diff --git a/test/CodeGen/R600/endcf-loop-header.ll b/test/CodeGen/R600/endcf-loop-header.ll
index e3c5b3c..267a323 100644
--- a/test/CodeGen/R600/endcf-loop-header.ll
+++ b/test/CodeGen/R600/endcf-loop-header.ll
@@ -28,7 +28,7 @@ loop:
br i1 %tmp2, label %done, label %loop
done:
- %tmp3 = getelementptr i32 addrspace(1)* %out, i64 1
+ %tmp3 = getelementptr i32, i32 addrspace(1)* %out, i64 1
store i32 %inc, i32 addrspace(1)* %tmp3
ret void
}
diff --git a/test/CodeGen/R600/extload-private.ll b/test/CodeGen/R600/extload-private.ll
index fec8682..294c3a9 100644
--- a/test/CodeGen/R600/extload-private.ll
+++ b/test/CodeGen/R600/extload-private.ll
@@ -6,7 +6,7 @@
define void @load_i8_sext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i8
- %tmp1 = load i8* %tmp0
+ %tmp1 = load i8, i8* %tmp0
%tmp2 = sext i8 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -17,7 +17,7 @@ entry:
define void @load_i8_zext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i8
- %tmp1 = load i8* %tmp0
+ %tmp1 = load i8, i8* %tmp0
%tmp2 = zext i8 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -28,7 +28,7 @@ entry:
define void @load_i16_sext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i16
- %tmp1 = load i16* %tmp0
+ %tmp1 = load i16, i16* %tmp0
%tmp2 = sext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -39,7 +39,7 @@ entry:
define void @load_i16_zext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i16
- %tmp1 = load i16* %tmp0
+ %tmp1 = load i16, i16* %tmp0
%tmp2 = zext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
index 77e5dc3..662eb7a 100644
--- a/test/CodeGen/R600/extload.ll
+++ b/test/CodeGen/R600/extload.ll
@@ -8,7 +8,7 @@
define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
%cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
- %load = load i32 addrspace(1)* %cast, align 1
+ %load = load i32, i32 addrspace(1)* %cast, align 1
%x = bitcast i32 %load to <4 x i8>
%castOut = bitcast i8 addrspace(1)* %out to <4 x i8> addrspace(1)*
store <4 x i8> %x, <4 x i8> addrspace(1)* %castOut, align 1
@@ -21,7 +21,7 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac
define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
%cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
- %load = load i32 addrspace(1)* %cast, align 1
+ %load = load i32, i32 addrspace(1)* %cast, align 1
%x = bitcast i32 %load to <2 x i16>
%castOut = bitcast i16 addrspace(1)* %out to <2 x i16> addrspace(1)*
store <2 x i16> %x, <2 x i16> addrspace(1)* %castOut, align 1
@@ -33,7 +33,7 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs
; EG: LDS_WRITE * [[VAL]]
define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
%cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
- %load = load i32 addrspace(3)* %cast, align 1
+ %load = load i32, i32 addrspace(3)* %cast, align 1
%x = bitcast i32 %load to <4 x i8>
%castOut = bitcast i8 addrspace(3)* %out to <4 x i8> addrspace(3)*
store <4 x i8> %x, <4 x i8> addrspace(3)* %castOut, align 1
@@ -45,7 +45,7 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr
; EG: LDS_WRITE * [[VAL]]
define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
%cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
- %load = load i32 addrspace(3)* %cast, align 1
+ %load = load i32, i32 addrspace(3)* %cast, align 1
%x = bitcast i32 %load to <2 x i16>
%castOut = bitcast i16 addrspace(3)* %out to <2 x i16> addrspace(3)*
store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll
index 0774a9a..c7572ef 100644
--- a/test/CodeGen/R600/extract_vector_elt_i16.ll
+++ b/test/CodeGen/R600/extract_vector_elt_i16.ll
@@ -9,7 +9,7 @@
define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
%p0 = extractelement <2 x i16> %foo, i32 0
%p1 = extractelement <2 x i16> %foo, i32 1
- %out1 = getelementptr i16 addrspace(1)* %out, i32 1
+ %out1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
store i16 %p1, i16 addrspace(1)* %out, align 2
store i16 %p0, i16 addrspace(1)* %out1, align 2
ret void
@@ -23,7 +23,7 @@ define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) no
define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
%p0 = extractelement <4 x i16> %foo, i32 0
%p1 = extractelement <4 x i16> %foo, i32 2
- %out1 = getelementptr i16 addrspace(1)* %out, i32 1
+ %out1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
store i16 %p1, i16 addrspace(1)* %out, align 2
store i16 %p0, i16 addrspace(1)* %out1, align 2
ret void
diff --git a/test/CodeGen/R600/fabs.f64.ll b/test/CodeGen/R600/fabs.f64.ll
index d87c082..3c6136c 100644
--- a/test/CodeGen/R600/fabs.f64.ll
+++ b/test/CodeGen/R600/fabs.f64.ll
@@ -13,8 +13,8 @@ declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%tidext = sext i32 %tid to i64
- %gep = getelementptr double addrspace(1)* %in, i64 %tidext
- %val = load double addrspace(1)* %gep, align 8
+ %gep = getelementptr double, double addrspace(1)* %in, i64 %tidext
+ %val = load double, double addrspace(1)* %gep, align 8
%fabs = call double @llvm.fabs.f64(double %val)
store double %fabs, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index 365af9b..5fac328 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -32,9 +32,9 @@ define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
; SI: v_add_f32
; SI: v_add_f32
define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1)* %in, align 16
- %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
+ %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
%result = fadd <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll
index f1f6fef..485c558 100644
--- a/test/CodeGen/R600/fadd64.ll
+++ b/test/CodeGen/R600/fadd64.ll
@@ -6,8 +6,8 @@
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fadd double %r0, %r1
store double %r2, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll
index e3244fa..e8c34f0 100644
--- a/test/CodeGen/R600/fceil64.ll
+++ b/test/CodeGen/R600/fceil64.ll
@@ -17,13 +17,13 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
; SI: s_lshr_b64
; SI: s_not_b64
; SI: s_and_b64
-; SI: cmp_lt_i32
+; SI: cmp_gt_i32
; SI: cndmask_b32
; SI: cndmask_b32
-; SI: cmp_gt_i32
+; SI: cmp_lt_i32
; SI: cndmask_b32
; SI: cndmask_b32
-; SI-DAG: v_cmp_gt_f64
+; SI-DAG: v_cmp_lt_f64
; SI-DAG: v_cmp_lg_f64
; SI: s_and_b64
; SI: v_cndmask_b32
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
index 1d4e323..530274f 100644
--- a/test/CodeGen/R600/fcmp-cnd.ll
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -6,7 +6,7 @@
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
- %0 = load float addrspace(1)* %in
+ %0 = load float, float addrspace(1)* %in
%cmp = fcmp oeq float %0, 0.000000e+00
%value = select i1 %cmp, i32 2, i32 3
store i32 %value, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll
index 55aba0d..c402805 100644
--- a/test/CodeGen/R600/fcmp-cnde-int-args.ll
+++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -8,7 +8,7 @@
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
- %0 = load float addrspace(1)* %in
+ %0 = load float, float addrspace(1)* %in
%cmp = fcmp oeq float %0, 0.000000e+00
%value = select i1 %cmp, i32 -1, i32 0
store i32 %value, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index 3399218..5207ab5 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -5,9 +5,9 @@
define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
- %0 = load float addrspace(1)* %in
- %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1
- %1 = load float addrspace(1)* %arrayidx1
+ %0 = load float, float addrspace(1)* %in
+ %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %in, i32 1
+ %1 = load float, float addrspace(1)* %arrayidx1
%cmp = fcmp oeq float %0, %1
%sext = sext i1 %cmp to i32
store i32 %sext, i32 addrspace(1)* %out
@@ -28,7 +28,7 @@ entry:
br i1 %0, label %IF, label %ENDIF
IF:
- %1 = getelementptr i32 addrspace(1)* %out, i32 1
+ %1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %1
br label %ENDIF
diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll
index 9dc8b50..053ab0e 100644
--- a/test/CodeGen/R600/fcmp64.ll
+++ b/test/CodeGen/R600/fcmp64.ll
@@ -5,8 +5,8 @@
; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp ult double %r0, %r1
%r3 = zext i1 %r2 to i32
store i32 %r3, i32 addrspace(1)* %out
@@ -17,8 +17,8 @@ define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp ule double %r0, %r1
%r3 = zext i1 %r2 to i32
store i32 %r3, i32 addrspace(1)* %out
@@ -29,8 +29,8 @@ define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp ugt double %r0, %r1
%r3 = zext i1 %r2 to i32
store i32 %r3, i32 addrspace(1)* %out
@@ -41,8 +41,8 @@ define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp uge double %r0, %r1
%r3 = zext i1 %r2 to i32
store i32 %r3, i32 addrspace(1)* %out
@@ -53,8 +53,8 @@ define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp une double %r0, %r1
%r3 = select i1 %r2, double %r0, double %r1
store double %r3, double addrspace(1)* %out
@@ -65,8 +65,8 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_nlg_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp ueq double %r0, %r1
%r3 = select i1 %r2, double %r0, double %r1
store double %r3, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
index 28e0c90..89af375 100644
--- a/test/CodeGen/R600/fconst64.ll
+++ b/test/CodeGen/R600/fconst64.ll
@@ -6,7 +6,7 @@
; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %r1 = load double addrspace(1)* %in
+ %r1 = load double, double addrspace(1)* %in
%r2 = fadd double %r1, 5.000000e+00
store double %r2, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fdiv.f64.ll b/test/CodeGen/R600/fdiv.f64.ll
index 6367f32..7c022e3 100644
--- a/test/CodeGen/R600/fdiv.f64.ll
+++ b/test/CodeGen/R600/fdiv.f64.ll
@@ -30,9 +30,9 @@
; COMMON: buffer_store_dwordx2 [[RESULT]]
; COMMON: s_endpgm
define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind {
- %gep.1 = getelementptr double addrspace(1)* %in, i32 1
- %num = load double addrspace(1)* %in
- %den = load double addrspace(1)* %gep.1
+ %gep.1 = getelementptr double, double addrspace(1)* %in, i32 1
+ %num = load double, double addrspace(1)* %in
+ %den = load double, double addrspace(1)* %gep.1
%result = fdiv double %num, %den
store double %result, double addrspace(1)* %out
ret void
@@ -40,7 +40,7 @@ define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounw
; COMMON-LABEL: {{^}}fdiv_f64_s_v:
define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind {
- %den = load double addrspace(1)* %in
+ %den = load double, double addrspace(1)* %in
%result = fdiv double %num, %den
store double %result, double addrspace(1)* %out
ret void
@@ -48,7 +48,7 @@ define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, d
; COMMON-LABEL: {{^}}fdiv_f64_v_s:
define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind {
- %num = load double addrspace(1)* %in
+ %num = load double, double addrspace(1)* %in
%result = fdiv double %num, %den
store double %result, double addrspace(1)* %out
ret void
@@ -63,9 +63,9 @@ define void @fdiv_f64_s_s(double addrspace(1)* %out, double %num, double %den) n
; COMMON-LABEL: {{^}}v_fdiv_v2f64:
define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind {
- %gep.1 = getelementptr <2 x double> addrspace(1)* %in, i32 1
- %num = load <2 x double> addrspace(1)* %in
- %den = load <2 x double> addrspace(1)* %gep.1
+ %gep.1 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in, i32 1
+ %num = load <2 x double>, <2 x double> addrspace(1)* %in
+ %den = load <2 x double>, <2 x double> addrspace(1)* %gep.1
%result = fdiv <2 x double> %num, %den
store <2 x double> %result, <2 x double> addrspace(1)* %out
ret void
@@ -80,9 +80,9 @@ define void @s_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %num, <2
; COMMON-LABEL: {{^}}v_fdiv_v4f64:
define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind {
- %gep.1 = getelementptr <4 x double> addrspace(1)* %in, i32 1
- %num = load <4 x double> addrspace(1)* %in
- %den = load <4 x double> addrspace(1)* %gep.1
+ %gep.1 = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
+ %num = load <4 x double>, <4 x double> addrspace(1)* %in
+ %den = load <4 x double>, <4 x double> addrspace(1)* %gep.1
%result = fdiv <4 x double> %num, %den
store <4 x double> %result, <4 x double> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
index 603287f..7cbf873 100644
--- a/test/CodeGen/R600/fdiv.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -59,9 +59,9 @@ entry:
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1) * %in
- %b = load <4 x float> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float>, <4 x float> addrspace(1) * %in
+ %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
%result = fdiv <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fetch-limits.r600.ll b/test/CodeGen/R600/fetch-limits.r600.ll
index d35573e..e7160ef 100644
--- a/test/CodeGen/R600/fetch-limits.r600.ll
+++ b/test/CodeGen/R600/fetch-limits.r600.ll
@@ -9,15 +9,15 @@
define void @fetch_limits_r600() #0 {
entry:
- %0 = load <4 x float> addrspace(8)* null
- %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
+ %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
%res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
%res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
diff --git a/test/CodeGen/R600/fetch-limits.r700+.ll b/test/CodeGen/R600/fetch-limits.r700+.ll
index 17760a0..acaea2a 100644
--- a/test/CodeGen/R600/fetch-limits.r700+.ll
+++ b/test/CodeGen/R600/fetch-limits.r700+.ll
@@ -18,23 +18,23 @@
define void @fetch_limits_r700() #0 {
entry:
- %0 = load <4 x float> addrspace(8)* null
- %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
- %9 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
- %11 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
- %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
- %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
- %14 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
- %15 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
- %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
+ %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %9 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %11 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+ %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+ %14 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %15 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+ %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
%res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
%res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
%res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
diff --git a/test/CodeGen/R600/ffloor.f64.ll b/test/CodeGen/R600/ffloor.f64.ll
index 745ad3b..45f8382 100644
--- a/test/CodeGen/R600/ffloor.f64.ll
+++ b/test/CodeGen/R600/ffloor.f64.ll
@@ -1,7 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+declare double @llvm.fabs.f64(double %Val)
declare double @llvm.floor.f64(double) nounwind readnone
declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone
@@ -11,24 +12,11 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
; FUNC-LABEL: {{^}}ffloor_f64:
; CI: v_floor_f64_e32
-
-; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: s_lshr_b64
-; SI: s_not_b64
-; SI: s_and_b64
-; SI: cmp_lt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
-; SI: cmp_gt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
-; SI-DAG: v_cmp_lt_f64
-; SI-DAG: v_cmp_lg_f64
-; SI-DAG: s_and_b64
-; SI-DAG: v_cndmask_b32
-; SI-DAG: v_cndmask_b32
+; SI: v_fract_f64_e32
+; SI: v_min_f64
+; SI: v_cmp_class_f64_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
; SI: v_add_f64
; SI: s_endpgm
define void @ffloor_f64(double addrspace(1)* %out, double %x) {
@@ -37,6 +25,39 @@ define void @ffloor_f64(double addrspace(1)* %out, double %x) {
ret void
}
+; FUNC-LABEL: {{^}}ffloor_f64_neg:
+; CI: v_floor_f64_e64
+; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]]
+; SI: v_min_f64
+; SI: v_cmp_class_f64_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
+; SI: s_endpgm
+define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
+ %neg = fsub double 0.0, %x
+ %y = call double @llvm.floor.f64(double %neg) nounwind readnone
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ffloor_f64_neg_abs:
+; CI: v_floor_f64_e64
+; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]|
+; SI: v_min_f64
+; SI: v_cmp_class_f64_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
+; SI: s_endpgm
+define void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) {
+ %abs = call double @llvm.fabs.f64(double %x)
+ %neg = fsub double 0.0, %abs
+ %y = call double @llvm.floor.f64(double %neg) nounwind readnone
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}ffloor_v2f64:
; CI: v_floor_f64_e32
; CI: v_floor_f64_e32
diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll
index 2e98bf5..425d67d 100644
--- a/test/CodeGen/R600/flat-address-space.ll
+++ b/test/CodeGen/R600/flat-address-space.ll
@@ -26,7 +26,7 @@ global:
end:
%fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
store i32 %x, i32 addrspace(4)* %fptr, align 4
-; %val = load i32 addrspace(4)* %fptr, align 4
+; %val = load i32, i32 addrspace(4)* %fptr, align 4
; store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
@@ -87,7 +87,7 @@ define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
; CHECK: flat_load_dword
define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
- %fload = load i32 addrspace(4)* %fptr, align 4
+ %fload = load i32, i32 addrspace(4)* %fptr, align 4
store i32 %fload, i32 addrspace(1)* %out, align 4
ret void
}
@@ -96,7 +96,7 @@ define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noa
; CHECK: flat_load_dwordx2
define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
- %fload = load i64 addrspace(4)* %fptr, align 4
+ %fload = load i64, i64 addrspace(4)* %fptr, align 4
store i64 %fload, i64 addrspace(1)* %out, align 8
ret void
}
@@ -105,7 +105,7 @@ define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
; CHECK: flat_load_dwordx4
define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
- %fload = load <4 x i32> addrspace(4)* %fptr, align 4
+ %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 4
store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
ret void
}
@@ -114,7 +114,7 @@ define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> add
; CHECK: flat_load_sbyte
define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
- %fload = load i8 addrspace(4)* %fptr, align 4
+ %fload = load i8, i8 addrspace(4)* %fptr, align 4
%ext = sext i8 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -124,7 +124,7 @@ define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
; CHECK: flat_load_ubyte
define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
- %fload = load i8 addrspace(4)* %fptr, align 4
+ %fload = load i8, i8 addrspace(4)* %fptr, align 4
%ext = zext i8 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -134,7 +134,7 @@ define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
; CHECK: flat_load_sshort
define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
- %fload = load i16 addrspace(4)* %fptr, align 4
+ %fload = load i16, i16 addrspace(4)* %fptr, align 4
%ext = sext i16 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -144,7 +144,7 @@ define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
; CHECK: flat_load_ushort
define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
- %fload = load i16 addrspace(4)* %fptr, align 4
+ %fload = load i16, i16 addrspace(4)* %fptr, align 4
%ext = zext i16 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -166,12 +166,12 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
%alloca = alloca i32, i32 9, align 4
%x = call i32 @llvm.r600.read.tidig.x() #3
- %pptr = getelementptr i32* %alloca, i32 %x
+ %pptr = getelementptr i32, i32* %alloca, i32 %x
%fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
store i32 %x, i32 addrspace(4)* %fptr
; Dummy call
call void @llvm.AMDGPU.barrier.local() #1
- %reload = load i32 addrspace(4)* %fptr, align 4
+ %reload = load i32, i32 addrspace(4)* %fptr, align 4
store i32 %reload, i32 addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/R600/fma-combine.ll b/test/CodeGen/R600/fma-combine.ll
index 9aac90c..bd574b8 100644
--- a/test/CodeGen/R600/fma-combine.ll
+++ b/test/CodeGen/R600/fma-combine.ll
@@ -15,14 +15,14 @@ declare float @llvm.fma.f32(float, float, float) #0
; SI: buffer_store_dwordx2 [[RESULT]]
define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr double addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
%mul = fmul double %a, %b
%fma = fadd double %mul, %c
@@ -43,17 +43,17 @@ define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addr
; SI: s_endpgm
define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1
-
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
- %d = load double addrspace(1)* %gep.3
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
%mul = fmul double %a, %b
%fma0 = fadd double %mul, %c
@@ -72,14 +72,14 @@ define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double
; SI: buffer_store_dwordx2 [[RESULT]]
define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr double addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
%mul = fmul double %a, %b
%fma = fadd double %c, %mul
@@ -96,14 +96,14 @@ define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addr
; SI: buffer_store_dwordx2 [[RESULT]]
define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr double addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
%mul = fmul double %a, %b
%fma = fsub double %mul, %c
@@ -124,17 +124,17 @@ define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double
; SI: s_endpgm
define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1
-
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
- %d = load double addrspace(1)* %gep.3
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
%mul = fmul double %a, %b
%fma0 = fsub double %mul, %c
@@ -153,14 +153,14 @@ define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, d
; SI: buffer_store_dwordx2 [[RESULT]]
define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr double addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
%mul = fmul double %a, %b
%fma = fsub double %c, %mul
@@ -181,17 +181,17 @@ define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double
; SI: s_endpgm
define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1
-
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
- %d = load double addrspace(1)* %gep.3
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
%mul = fmul double %a, %b
%fma0 = fsub double %c, %mul
@@ -210,14 +210,14 @@ define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, d
; SI: buffer_store_dwordx2 [[RESULT]]
define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr double addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
%mul = fmul double %a, %b
%mul.neg = fsub double -0.0, %mul
@@ -239,17 +239,17 @@ define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double
; SI: s_endpgm
define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1
-
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
- %d = load double addrspace(1)* %gep.3
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
%mul = fmul double %a, %b
%mul.neg = fsub double -0.0, %mul
@@ -273,17 +273,17 @@ define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %o
; SI: s_endpgm
define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1
-
- %a = load double addrspace(1)* %gep.0
- %b = load double addrspace(1)* %gep.1
- %c = load double addrspace(1)* %gep.2
- %d = load double addrspace(1)* %gep.3
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
%mul = fmul double %a, %b
%mul.neg = fsub double -0.0, %mul
@@ -308,18 +308,18 @@ define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %o
; SI: buffer_store_dwordx2 [[RESULT]]
define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr double addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr double addrspace(1)* %out, i32 %tid
-
- %x = load double addrspace(1)* %gep.0
- %y = load double addrspace(1)* %gep.1
- %z = load double addrspace(1)* %gep.2
- %u = load double addrspace(1)* %gep.3
- %v = load double addrspace(1)* %gep.4
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %x = load double, double addrspace(1)* %gep.0
+ %y = load double, double addrspace(1)* %gep.1
+ %z = load double, double addrspace(1)* %gep.2
+ %u = load double, double addrspace(1)* %gep.3
+ %v = load double, double addrspace(1)* %gep.4
%tmp0 = fmul double %u, %v
%tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
@@ -343,18 +343,18 @@ define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %
; SI: buffer_store_dwordx2 [[RESULT]]
define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr double addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr double addrspace(1)* %out, i32 %tid
-
- %x = load double addrspace(1)* %gep.0
- %y = load double addrspace(1)* %gep.1
- %z = load double addrspace(1)* %gep.2
- %u = load double addrspace(1)* %gep.3
- %v = load double addrspace(1)* %gep.4
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %x = load double, double addrspace(1)* %gep.0
+ %y = load double, double addrspace(1)* %gep.1
+ %z = load double, double addrspace(1)* %gep.2
+ %u = load double, double addrspace(1)* %gep.3
+ %v = load double, double addrspace(1)* %gep.4
%tmp0 = fmul double %u, %v
%tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/R600/fma.f64.ll
index bca312b..0a55ef7 100644
--- a/test/CodeGen/R600/fma.f64.ll
+++ b/test/CodeGen/R600/fma.f64.ll
@@ -10,9 +10,9 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) n
; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = load double addrspace(1)* %in3
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
+ %r2 = load double, double addrspace(1)* %in3
%r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
store double %r3, double addrspace(1)* %out
ret void
@@ -23,9 +23,9 @@ define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; SI: v_fma_f64
define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
- %r0 = load <2 x double> addrspace(1)* %in1
- %r1 = load <2 x double> addrspace(1)* %in2
- %r2 = load <2 x double> addrspace(1)* %in3
+ %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+ %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
+ %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3
%r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
store <2 x double> %r3, <2 x double> addrspace(1)* %out
ret void
@@ -38,9 +38,9 @@ define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1
; SI: v_fma_f64
define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
- %r0 = load <4 x double> addrspace(1)* %in1
- %r1 = load <4 x double> addrspace(1)* %in2
- %r2 = load <4 x double> addrspace(1)* %in3
+ %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
+ %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
+ %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3
%r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
store <4 x double> %r3, <4 x double> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll
index f3861ff..d6024aa 100644
--- a/test/CodeGen/R600/fma.ll
+++ b/test/CodeGen/R600/fma.ll
@@ -14,9 +14,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; EG: FMA {{\*? *}}[[RES]]
define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
- %r0 = load float addrspace(1)* %in1
- %r1 = load float addrspace(1)* %in2
- %r2 = load float addrspace(1)* %in3
+ %r0 = load float, float addrspace(1)* %in1
+ %r1 = load float, float addrspace(1)* %in2
+ %r2 = load float, float addrspace(1)* %in3
%r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
store float %r3, float addrspace(1)* %out
ret void
@@ -31,9 +31,9 @@ define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
; EG-DAG: FMA {{\*? *}}[[RES]].[[CHHI]]
define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
<2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) {
- %r0 = load <2 x float> addrspace(1)* %in1
- %r1 = load <2 x float> addrspace(1)* %in2
- %r2 = load <2 x float> addrspace(1)* %in3
+ %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1
+ %r1 = load <2 x float>, <2 x float> addrspace(1)* %in2
+ %r2 = load <2 x float>, <2 x float> addrspace(1)* %in3
%r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
store <2 x float> %r3, <2 x float> addrspace(1)* %out
ret void
@@ -52,9 +52,9 @@ define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)*
; EG-DAG: FMA {{\*? *}}[[RES]].W
define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
<4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) {
- %r0 = load <4 x float> addrspace(1)* %in1
- %r1 = load <4 x float> addrspace(1)* %in2
- %r2 = load <4 x float> addrspace(1)* %in3
+ %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1
+ %r1 = load <4 x float>, <4 x float> addrspace(1)* %in2
+ %r2 = load <4 x float>, <4 x float> addrspace(1)* %in3
%r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
store <4 x float> %r3, <4 x float> addrspace(1)* %out
ret void
@@ -64,12 +64,12 @@ define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)*
; SI: v_fma_f32 {{v[0-9]+}}, 2.0, {{v[0-9]+}}, {{v[0-9]+}}
define void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
- %b = load float addrspace(1)* %in.b.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
%fma = call float @llvm.fma.f32(float %a, float 2.0, float %b)
store float %fma, float addrspace(1)* %out.gep, align 4
@@ -79,12 +79,12 @@ define void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, fl
; FUNC-LABEL: @fma_commute_mul_s_f32
define void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
- %c = load float addrspace(1)* %in.b.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %c = load float, float addrspace(1)* %in.b.gep, align 4
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
store float %fma, float addrspace(1)* %out.gep, align 4
diff --git a/test/CodeGen/R600/fmax3.f64.ll b/test/CodeGen/R600/fmax3.f64.ll
new file mode 100644
index 0000000..f78c71b
--- /dev/null
+++ b/test/CodeGen/R600/fmax3.f64.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare double @llvm.maxnum.f64(double, double) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmax3_f64:
+; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0{{$}}
+; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:8
+; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; SI: v_max_f64 [[REGA]], [[REGA]], [[REGB]]
+; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[REGA]], [[REGC]]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
+ %bptr = getelementptr double, double addrspace(1)* %aptr, i32 1
+ %cptr = getelementptr double, double addrspace(1)* %aptr, i32 2
+ %a = load double, double addrspace(1)* %aptr, align 8
+ %b = load double, double addrspace(1)* %bptr, align 8
+ %c = load double, double addrspace(1)* %cptr, align 8
+ %f0 = call double @llvm.maxnum.f64(double %a, double %b) nounwind readnone
+ %f1 = call double @llvm.maxnum.f64(double %f0, double %c) nounwind readnone
+ store double %f1, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/R600/fmax3.ll
index 629c032..c3028a6 100644
--- a/test/CodeGen/R600/fmax3.ll
+++ b/test/CodeGen/R600/fmax3.ll
@@ -11,9 +11,9 @@ declare float @llvm.maxnum.f32(float, float) nounwind readnone
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
- %c = load float addrspace(1)* %cptr, align 4
+ %a = load float, float addrspace(1)* %aptr, align 4
+ %b = load float, float addrspace(1)* %bptr, align 4
+ %c = load float, float addrspace(1)* %cptr, align 4
%f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
%f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
store float %f1, float addrspace(1)* %out, align 4
@@ -29,9 +29,9 @@ define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
- %c = load float addrspace(1)* %cptr, align 4
+ %a = load float, float addrspace(1)* %aptr, align 4
+ %b = load float, float addrspace(1)* %bptr, align 4
+ %c = load float, float addrspace(1)* %cptr, align 4
%f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
%f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
store float %f1, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fmax_legacy.f64.ll b/test/CodeGen/R600/fmax_legacy.f64.ll
index 762853d..8282438 100644
--- a/test/CodeGen/R600/fmax_legacy.f64.ll
+++ b/test/CodeGen/R600/fmax_legacy.f64.ll
@@ -6,11 +6,11 @@ declare i32 @llvm.r600.read.tidig.x() #1
; FUNC-LABEL: @test_fmax_legacy_uge_f64
define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp uge double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -21,11 +21,11 @@ define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmax_legacy_oge_f64
define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp oge double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -36,11 +36,11 @@ define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmax_legacy_ugt_f64
define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ugt double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -51,11 +51,11 @@ define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmax_legacy_ogt_f64
define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ogt double %a, %b
%val = select i1 %cmp, double %a, double %b
diff --git a/test/CodeGen/R600/fmax_legacy.ll b/test/CodeGen/R600/fmax_legacy.ll
index 46f0e98..413957d 100644
--- a/test/CodeGen/R600/fmax_legacy.ll
+++ b/test/CodeGen/R600/fmax_legacy.ll
@@ -15,11 +15,11 @@ declare i32 @llvm.r600.read.tidig.x() #1
; EG: MAX
define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp uge float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -35,11 +35,11 @@ define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(
; EG: MAX
define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp oge float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -55,11 +55,11 @@ define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(
; EG: MAX
define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ugt float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -75,11 +75,11 @@ define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(
; EG: MAX
define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ogt float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -99,11 +99,11 @@ define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(
; EG: MAX
define void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ogt float %a, %b
%val = select i1 %cmp, float %a, float %b
diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/R600/fmin3.ll
index e3acb31..0a76699 100644
--- a/test/CodeGen/R600/fmin3.ll
+++ b/test/CodeGen/R600/fmin3.ll
@@ -12,9 +12,9 @@ declare float @llvm.minnum.f32(float, float) nounwind readnone
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
- %c = load float addrspace(1)* %cptr, align 4
+ %a = load float, float addrspace(1)* %aptr, align 4
+ %b = load float, float addrspace(1)* %bptr, align 4
+ %c = load float, float addrspace(1)* %cptr, align 4
%f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
%f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
store float %f1, float addrspace(1)* %out, align 4
@@ -30,9 +30,9 @@ define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
- %c = load float addrspace(1)* %cptr, align 4
+ %a = load float, float addrspace(1)* %aptr, align 4
+ %b = load float, float addrspace(1)* %bptr, align 4
+ %c = load float, float addrspace(1)* %cptr, align 4
%f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
%f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
store float %f1, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fmin_legacy.f64.ll b/test/CodeGen/R600/fmin_legacy.f64.ll
index 83043cd..e19a48f 100644
--- a/test/CodeGen/R600/fmin_legacy.f64.ll
+++ b/test/CodeGen/R600/fmin_legacy.f64.ll
@@ -16,11 +16,11 @@ define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double>
; FUNC-LABEL: @test_fmin_legacy_ule_f64
define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ule double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -31,11 +31,11 @@ define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmin_legacy_ole_f64
define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ole double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -46,11 +46,11 @@ define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmin_legacy_olt_f64
define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp olt double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -61,11 +61,11 @@ define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmin_legacy_ult_f64
define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ult double %a, %b
%val = select i1 %cmp, double %a, double %b
diff --git a/test/CodeGen/R600/fmin_legacy.ll b/test/CodeGen/R600/fmin_legacy.ll
index 5014f6c..6a625c2 100644
--- a/test/CodeGen/R600/fmin_legacy.ll
+++ b/test/CodeGen/R600/fmin_legacy.ll
@@ -27,11 +27,11 @@ define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> in
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ule float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -46,11 +46,11 @@ define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ole float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -65,11 +65,11 @@ define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp olt float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -84,11 +84,11 @@ define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ult float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -106,11 +106,11 @@ define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(
; SI: s_endpgm
define void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ole float %a, %b
%val0 = select i1 %cmp, float %a, float %b
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index 6c09aa2..68ebc4d 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -42,9 +42,9 @@ entry:
; SI: v_mul_f32
; SI: v_mul_f32
define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1) * %in
- %b = load <4 x float> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float>, <4 x float> addrspace(1) * %in
+ %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
%result = fmul <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll
index 9d7787c..3c222ea 100644
--- a/test/CodeGen/R600/fmul64.ll
+++ b/test/CodeGen/R600/fmul64.ll
@@ -5,8 +5,8 @@
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fmul double %r0, %r1
store double %r2, double addrspace(1)* %out
ret void
@@ -17,8 +17,8 @@ define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2) {
- %r0 = load <2 x double> addrspace(1)* %in1
- %r1 = load <2 x double> addrspace(1)* %in2
+ %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+ %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
%r2 = fmul <2 x double> %r0, %r1
store <2 x double> %r2, <2 x double> addrspace(1)* %out
ret void
@@ -31,8 +31,8 @@ define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2) {
- %r0 = load <4 x double> addrspace(1)* %in1
- %r1 = load <4 x double> addrspace(1)* %in2
+ %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
+ %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
%r2 = fmul <4 x double> %r0, %r1
store <4 x double> %r2, <4 x double> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/R600/fmuladd.ll
index 2b70863..ae84d84 100644
--- a/test/CodeGen/R600/fmuladd.ll
+++ b/test/CodeGen/R600/fmuladd.ll
@@ -10,9 +10,9 @@ declare float @llvm.fabs.f32(float) nounwind readnone
define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
- %r0 = load float addrspace(1)* %in1
- %r1 = load float addrspace(1)* %in2
- %r2 = load float addrspace(1)* %in3
+ %r0 = load float, float addrspace(1)* %in1
+ %r1 = load float, float addrspace(1)* %in2
+ %r2 = load float, float addrspace(1)* %in3
%r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
store float %r3, float addrspace(1)* %out
ret void
@@ -23,9 +23,9 @@ define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = load double addrspace(1)* %in3
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
+ %r2 = load double, double addrspace(1)* %in3
%r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
store double %r3, double addrspace(1)* %out
ret void
@@ -38,12 +38,12 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
store float %r3, float addrspace(1)* %gep.out
@@ -57,12 +57,12 @@ define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
store float %r3, float addrspace(1)* %gep.out
@@ -78,12 +78,12 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r0 = load float addrspace(1)* %gep.0
- %r1 = load float addrspace(1)* %gep.1
+ %r0 = load float, float addrspace(1)* %gep.0
+ %r1 = load float, float addrspace(1)* %gep.1
%add.0 = fadd float %r0, %r0
%add.1 = fadd float %add.0, %r1
@@ -100,12 +100,12 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r0 = load float addrspace(1)* %gep.0
- %r1 = load float addrspace(1)* %gep.1
+ %r0 = load float, float addrspace(1)* %gep.0
+ %r1 = load float, float addrspace(1)* %gep.1
%add.0 = fadd float %r0, %r0
%add.1 = fadd float %r1, %add.0
@@ -120,12 +120,12 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
store float %r3, float addrspace(1)* %gep.out
@@ -140,12 +140,12 @@ define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r1.fneg = fsub float -0.000000e+00, %r1
@@ -162,12 +162,12 @@ define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspa
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r1.fneg = fsub float -0.000000e+00, %r1
@@ -184,12 +184,12 @@ define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r2.fneg = fsub float -0.000000e+00, %r2
diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/R600/fneg-fabs.f64.ll
index 7e6ede6..8830e82 100644
--- a/test/CodeGen/R600/fneg-fabs.f64.ll
+++ b/test/CodeGen/R600/fneg-fabs.f64.ll
@@ -15,8 +15,8 @@ define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y)
}
define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
- %x = load double addrspace(1)* %xptr, align 8
- %y = load double addrspace(1)* %xptr, align 8
+ %x = load double, double addrspace(1)* %xptr, align 8
+ %y = load double, double addrspace(1)* %xptr, align 8
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
%fadd = fadd double %y, %fsub
diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll
index 4fde048..3b4930d 100644
--- a/test/CodeGen/R600/fneg-fabs.ll
+++ b/test/CodeGen/R600/fneg-fabs.ll
@@ -72,7 +72,7 @@ define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %val = load float addrspace(1)* %in, align 4
+ %val = load float, float addrspace(1)* %in, align 4
%fabs = call float @llvm.fabs.f32(float %val)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fp16_to_fp.ll b/test/CodeGen/R600/fp16_to_fp.ll
index da78f61..5a79ca8 100644
--- a/test/CodeGen/R600/fp16_to_fp.ll
+++ b/test/CodeGen/R600/fp16_to_fp.ll
@@ -9,7 +9,7 @@ declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]]
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
+ %val = load i16, i16 addrspace(1)* %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -22,7 +22,7 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
; SI: buffer_store_dwordx2 [[RESULT]]
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
+ %val = load i16, i16 addrspace(1)* %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
store double %cvt, double addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll
index c3c65ae..67925eb 100644
--- a/test/CodeGen/R600/fp32_to_fp16.ll
+++ b/test/CodeGen/R600/fp32_to_fp16.ll
@@ -8,7 +8,7 @@ declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_short [[RESULT]]
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float addrspace(1)* %in, align 4
+ %val = load float, float addrspace(1)* %in, align 4
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 2
ret void
diff --git a/test/CodeGen/R600/fp_to_sint.f64.ll b/test/CodeGen/R600/fp_to_sint.f64.ll
index e641847..12df660 100644
--- a/test/CodeGen/R600/fp_to_sint.f64.ll
+++ b/test/CodeGen/R600/fp_to_sint.f64.ll
@@ -48,8 +48,8 @@ define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
- %val = load double addrspace(1)* %gep, align 8
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %val = load double, double addrspace(1)* %gep, align 8
%cast = fptosi double %val to i64
store i64 %cast, i64 addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index 16549c3..301a94b 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -44,7 +44,7 @@ define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
; SI: v_cvt_i32_f32_e32
; SI: v_cvt_i32_f32_e32
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %value = load <4 x float> addrspace(1) * %in
+ %value = load <4 x float>, <4 x float> addrspace(1) * %in
%result = fptosi <4 x float> %value to <4 x i32>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll
index 1ffe2fa..41bc2a7 100644
--- a/test/CodeGen/R600/fp_to_uint.f64.ll
+++ b/test/CodeGen/R600/fp_to_uint.f64.ll
@@ -48,8 +48,8 @@ define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
- %val = load double addrspace(1)* %gep, align 8
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %val = load double, double addrspace(1)* %gep, align 8
%cast = fptoui double %val to i64
store i64 %cast, i64 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
index 804d90f..b7b6ccc 100644
--- a/test/CodeGen/R600/fp_to_uint.ll
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -36,7 +36,7 @@ define void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float>
; SI: v_cvt_u32_f32_e32
define void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %value = load <4 x float> addrspace(1) * %in
+ %value = load <4 x float>, <4 x float> addrspace(1) * %in
%result = fptoui <4 x float> %value to <4 x i32>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll
index 02a0070..f245ef0 100644
--- a/test/CodeGen/R600/frem.ll
+++ b/test/CodeGen/R600/frem.ll
@@ -15,9 +15,9 @@
; GCN: s_endpgm
define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #0 {
- %gep2 = getelementptr float addrspace(1)* %in2, i32 4
- %r0 = load float addrspace(1)* %in1, align 4
- %r1 = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
+ %r0 = load float, float addrspace(1)* %in1, align 4
+ %r1 = load float, float addrspace(1)* %gep2, align 4
%r2 = frem float %r0, %r1
store float %r2, float addrspace(1)* %out, align 4
ret void
@@ -34,9 +34,9 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
; GCN: s_endpgm
define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #1 {
- %gep2 = getelementptr float addrspace(1)* %in2, i32 4
- %r0 = load float addrspace(1)* %in1, align 4
- %r1 = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
+ %r0 = load float, float addrspace(1)* %in1, align 4
+ %r1 = load float, float addrspace(1)* %gep2, align 4
%r2 = frem float %r0, %r1
store float %r2, float addrspace(1)* %out, align 4
ret void
@@ -55,8 +55,8 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
; GCN: s_endpgm
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
- %r0 = load double addrspace(1)* %in1, align 8
- %r1 = load double addrspace(1)* %in2, align 8
+ %r0 = load double, double addrspace(1)* %in1, align 8
+ %r1 = load double, double addrspace(1)* %in2, align 8
%r2 = frem double %r0, %r1
store double %r2, double addrspace(1)* %out, align 8
ret void
@@ -71,8 +71,8 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; GCN: s_endpgm
define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #1 {
- %r0 = load double addrspace(1)* %in1, align 8
- %r1 = load double addrspace(1)* %in2, align 8
+ %r0 = load double, double addrspace(1)* %in1, align 8
+ %r1 = load double, double addrspace(1)* %in2, align 8
%r2 = frem double %r0, %r1
store double %r2, double addrspace(1)* %out, align 8
ret void
@@ -80,9 +80,9 @@ define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in
define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
<2 x float> addrspace(1)* %in2) #0 {
- %gep2 = getelementptr <2 x float> addrspace(1)* %in2, i32 4
- %r0 = load <2 x float> addrspace(1)* %in1, align 8
- %r1 = load <2 x float> addrspace(1)* %gep2, align 8
+ %gep2 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in2, i32 4
+ %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1, align 8
+ %r1 = load <2 x float>, <2 x float> addrspace(1)* %gep2, align 8
%r2 = frem <2 x float> %r0, %r1
store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8
ret void
@@ -90,9 +90,9 @@ define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)
define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
<4 x float> addrspace(1)* %in2) #0 {
- %gep2 = getelementptr <4 x float> addrspace(1)* %in2, i32 4
- %r0 = load <4 x float> addrspace(1)* %in1, align 16
- %r1 = load <4 x float> addrspace(1)* %gep2, align 16
+ %gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in2, i32 4
+ %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1, align 16
+ %r1 = load <4 x float>, <4 x float> addrspace(1)* %gep2, align 16
%r2 = frem <4 x float> %r0, %r1
store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16
ret void
@@ -100,9 +100,9 @@ define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
define void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2) #0 {
- %gep2 = getelementptr <2 x double> addrspace(1)* %in2, i32 4
- %r0 = load <2 x double> addrspace(1)* %in1, align 16
- %r1 = load <2 x double> addrspace(1)* %gep2, align 16
+ %gep2 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in2, i32 4
+ %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1, align 16
+ %r1 = load <2 x double>, <2 x double> addrspace(1)* %gep2, align 16
%r2 = frem <2 x double> %r0, %r1
store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll
index 1fdf3e4..0410134 100644
--- a/test/CodeGen/R600/fsqrt.ll
+++ b/test/CodeGen/R600/fsqrt.ll
@@ -9,7 +9,7 @@
; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %r0 = load float addrspace(1)* %in
+ %r0 = load float, float addrspace(1)* %in
%r1 = call float @llvm.sqrt.f32(float %r0)
store float %r1, float addrspace(1)* %out
ret void
@@ -19,7 +19,7 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %r0 = load double addrspace(1)* %in
+ %r0 = load double, double addrspace(1)* %in
%r1 = call double @llvm.sqrt.f64(double %r0)
store double %r1, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index ef90fea..dfe41cb 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -6,9 +6,9 @@
; FUNC-LABEL: {{^}}v_fsub_f32:
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %b_ptr = getelementptr float addrspace(1)* %in, i32 1
- %a = load float addrspace(1)* %in, align 4
- %b = load float addrspace(1)* %b_ptr, align 4
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %a = load float, float addrspace(1)* %in, align 4
+ %b = load float, float addrspace(1)* %b_ptr, align 4
%result = fsub float %a, %b
store float %result, float addrspace(1)* %out, align 4
ret void
@@ -52,9 +52,9 @@ define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1)* %in, align 16
- %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
+ %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
%result = fsub <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index 2d85cc5..f34a48e 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -7,8 +7,8 @@ declare double @llvm.fabs.f64(double) #0
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fsub double %r0, %r1
store double %r2, double addrspace(1)* %out
ret void
@@ -18,8 +18,8 @@ define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r1.fabs = call double @llvm.fabs.f64(double %r1) #0
%r2 = fsub double %r0, %r1.fabs
store double %r2, double addrspace(1)* %out
@@ -30,8 +30,8 @@ define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}}
define void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r0.fabs = call double @llvm.fabs.f64(double %r0) #0
%r2 = fsub double %r0.fabs, %r1
store double %r2, double addrspace(1)* %out
@@ -85,9 +85,9 @@ define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x d
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x double> addrspace(1)* %in, i32 1
- %a = load <4 x double> addrspace(1)* %in
- %b = load <4 x double> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
+ %a = load <4 x double>, <4 x double> addrspace(1)* %in
+ %b = load <4 x double>, <4 x double> addrspace(1)* %b_ptr
%result = fsub <4 x double> %a, %b
store <4 x double> %result, <4 x double> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll
index 21399a8..4ea84a7 100644
--- a/test/CodeGen/R600/ftrunc.f64.ll
+++ b/test/CodeGen/R600/ftrunc.f64.ll
@@ -14,7 +14,7 @@ declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
; SI: s_endpgm
define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %x = load double addrspace(1)* %in, align 8
+ %x = load double, double addrspace(1)* %in, align 8
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out, align 8
ret void
@@ -27,12 +27,12 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: s_lshr_b64
-; SI: cmp_lt_i32
+; SI: cmp_gt_i32
; SI: s_not_b64
; SI: s_and_b64
; SI: cndmask_b32
; SI: cndmask_b32
-; SI: cmp_gt_i32
+; SI: cmp_lt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: s_endpgm
diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll
index 5c6920d..471b0f6 100644
--- a/test/CodeGen/R600/gep-address-space.ll
+++ b/test/CodeGen/R600/gep-address-space.ll
@@ -6,7 +6,7 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: {{^}}use_gep_address_space:
; CHECK: v_mov_b32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: ds_write_b32 [[PTR]], v{{[0-9]+}} offset:64
- %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
+ %p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void
}
@@ -18,7 +18,7 @@ define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %arra
; SI: s_or_b32
; CI: s_add_i32
; CHECK: ds_write_b32
- %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
+ %p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
store i32 99, i32 addrspace(3)* %p
ret void
}
@@ -29,7 +29,7 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
; CHECK: s_add_i32
; CHECK: s_add_i32
; CHECK: s_add_i32
- %p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
%p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
%p2 = extractelement <4 x i32 addrspace(3)*> %p, i32 2
@@ -45,7 +45,7 @@ define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind
; CHECK-LABEL: {{^}}gep_as_vector_v2:
; CHECK: s_add_i32
; CHECK: s_add_i32
- %p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
+ %p = getelementptr [1024 x i32], <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
%p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
store i32 99, i32 addrspace(3)* %p0
diff --git a/test/CodeGen/R600/global-directive.ll b/test/CodeGen/R600/global-directive.ll
index 3ba12c2..be775cf 100644
--- a/test/CodeGen/R600/global-directive.ll
+++ b/test/CodeGen/R600/global-directive.ll
@@ -6,9 +6,9 @@
; SI: .globl foo
; SI: {{^}}foo:
define void @foo(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = add i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/R600/global-extload-i1.ll
index 67d36ce..bd9557d 100644
--- a/test/CodeGen/R600/global-extload-i1.ll
+++ b/test/CodeGen/R600/global-extload-i1.ll
@@ -8,7 +8,7 @@
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1 addrspace(1)* %in
+ %a = load i1, i1 addrspace(1)* %in
%ext = zext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -20,7 +20,7 @@ define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1 addrspace(1)* %in
+ %a = load i1, i1 addrspace(1)* %in
%ext = sext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -29,7 +29,7 @@ define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
; SI: s_endpgm
define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1> addrspace(1)* %in
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = zext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -38,7 +38,7 @@ define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1
; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
; SI: s_endpgm
define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1> addrspace(1)* %in
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = sext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -47,7 +47,7 @@ define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1
; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1> addrspace(1)* %in
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = zext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -56,7 +56,7 @@ define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1
; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1> addrspace(1)* %in
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = sext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -65,7 +65,7 @@ define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1
; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1> addrspace(1)* %in
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = zext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -74,7 +74,7 @@ define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1
; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1> addrspace(1)* %in
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = sext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -83,7 +83,7 @@ define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1
; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1> addrspace(1)* %in
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = zext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -92,7 +92,7 @@ define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1
; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1> addrspace(1)* %in
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = sext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -101,7 +101,7 @@ define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1
; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1> addrspace(1)* %in
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = zext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -110,7 +110,7 @@ define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1> addrspace(1)* %in
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = sext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -119,7 +119,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
; XSI: s_endpgm
; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1> addrspace(1)* %in
+; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
; %ext = zext <32 x i1> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
@@ -128,7 +128,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
; XSI: s_endpgm
; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1> addrspace(1)* %in
+; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
; %ext = sext <32 x i1> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
@@ -137,7 +137,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
; XSI: s_endpgm
; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1> addrspace(1)* %in
+; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
; %ext = zext <64 x i1> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
@@ -146,7 +146,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
; XSI: s_endpgm
; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1> addrspace(1)* %in
+; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
; %ext = sext <64 x i1> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
@@ -157,7 +157,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; SI: buffer_store_dwordx2
define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1 addrspace(1)* %in
+ %a = load i1, i1 addrspace(1)* %in
%ext = zext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -169,7 +169,7 @@ define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
; SI: buffer_store_dwordx2
define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1 addrspace(1)* %in
+ %a = load i1, i1 addrspace(1)* %in
%ext = sext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -178,7 +178,7 @@ define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1> addrspace(1)* %in
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = zext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -187,7 +187,7 @@ define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1
; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1> addrspace(1)* %in
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = sext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -196,7 +196,7 @@ define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1
; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1> addrspace(1)* %in
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = zext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -205,7 +205,7 @@ define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1
; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1> addrspace(1)* %in
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = sext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -214,7 +214,7 @@ define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1
; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1> addrspace(1)* %in
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = zext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -223,7 +223,7 @@ define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1
; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1> addrspace(1)* %in
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = sext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -232,7 +232,7 @@ define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1
; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1> addrspace(1)* %in
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = zext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -241,7 +241,7 @@ define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1
; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1> addrspace(1)* %in
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = sext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -250,7 +250,7 @@ define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1
; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1> addrspace(1)* %in
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = zext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -259,7 +259,7 @@ define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1> addrspace(1)* %in
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = sext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -268,7 +268,7 @@ define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
; XSI: s_endpgm
; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1> addrspace(1)* %in
+; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
; %ext = zext <32 x i1> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
@@ -277,7 +277,7 @@ define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
; XSI: s_endpgm
; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1> addrspace(1)* %in
+; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
; %ext = sext <32 x i1> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
@@ -286,7 +286,7 @@ define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
; XSI: s_endpgm
; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1> addrspace(1)* %in
+; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
; %ext = zext <64 x i1> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
@@ -295,7 +295,7 @@ define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
; XSI: s_endpgm
; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1> addrspace(1)* %in
+; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
; %ext = sext <64 x i1> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/R600/global-extload-i16.ll
index f3e3312..103a40d 100644
--- a/test/CodeGen/R600/global-extload-i16.ll
+++ b/test/CodeGen/R600/global-extload-i16.ll
@@ -8,7 +8,7 @@
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in
+ %a = load i16, i16 addrspace(1)* %in
%ext = zext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -19,7 +19,7 @@ define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in
+ %a = load i16, i16 addrspace(1)* %in
%ext = sext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -29,7 +29,7 @@ define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)
; SI: buffer_load_ushort
; SI: s_endpgm
define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16> addrspace(1)* %in
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = zext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -39,7 +39,7 @@ define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i
; SI: buffer_load_sshort
; SI: s_endpgm
define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16> addrspace(1)* %in
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = sext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -48,7 +48,7 @@ define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i
; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = zext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -57,7 +57,7 @@ define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = sext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -66,7 +66,7 @@ define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16> addrspace(1)* %in
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = zext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -75,7 +75,7 @@ define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16> addrspace(1)* %in
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = sext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -84,7 +84,7 @@ define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16> addrspace(1)* %in
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = zext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -93,7 +93,7 @@ define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i
; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16> addrspace(1)* %in
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = sext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -102,7 +102,7 @@ define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i
; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16> addrspace(1)* %in
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -111,7 +111,7 @@ define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16> addrspace(1)* %in
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -120,7 +120,7 @@ define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
; SI: s_endpgm
define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16> addrspace(1)* %in
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = zext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@@ -129,7 +129,7 @@ define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32
; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
; SI: s_endpgm
define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16> addrspace(1)* %in
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = sext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@@ -138,7 +138,7 @@ define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32
; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
; SI: s_endpgm
define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16> addrspace(1)* %in
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = zext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -147,7 +147,7 @@ define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64
; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
; SI: s_endpgm
define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16> addrspace(1)* %in
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = sext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -158,7 +158,7 @@ define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64
; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in
+ %a = load i16, i16 addrspace(1)* %in
%ext = zext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -169,7 +169,7 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in
+ %a = load i16, i16 addrspace(1)* %in
%ext = sext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -178,7 +178,7 @@ define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16> addrspace(1)* %in
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = zext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -187,7 +187,7 @@ define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16> addrspace(1)* %in
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = sext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -196,7 +196,7 @@ define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = zext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -205,7 +205,7 @@ define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = sext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -214,7 +214,7 @@ define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16> addrspace(1)* %in
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = zext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -223,7 +223,7 @@ define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16> addrspace(1)* %in
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = sext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -232,7 +232,7 @@ define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16> addrspace(1)* %in
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -241,7 +241,7 @@ define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16> addrspace(1)* %in
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -250,7 +250,7 @@ define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16> addrspace(1)* %in
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = zext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -259,7 +259,7 @@ define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16> addrspace(1)* %in
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = sext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -268,7 +268,7 @@ define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
; SI: s_endpgm
define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16> addrspace(1)* %in
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = zext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -277,7 +277,7 @@ define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32
; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
; SI: s_endpgm
define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16> addrspace(1)* %in
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = sext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -286,7 +286,7 @@ define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32
; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
; SI: s_endpgm
define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16> addrspace(1)* %in
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = zext <64 x i16> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
@@ -295,7 +295,7 @@ define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64
; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
; SI: s_endpgm
define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16> addrspace(1)* %in
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = sext <64 x i16> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/R600/global-extload-i32.ll
index b3d5438..79b8345 100644
--- a/test/CodeGen/R600/global-extload-i32.ll
+++ b/test/CodeGen/R600/global-extload-i32.ll
@@ -7,7 +7,7 @@
; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %a = load i32 addrspace(1)* %in
+ %a = load i32, i32 addrspace(1)* %in
%ext = zext i32 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -18,7 +18,7 @@ define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %a = load i32 addrspace(1)* %in
+ %a = load i32, i32 addrspace(1)* %in
%ext = sext i32 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -29,7 +29,7 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i32> addrspace(1)* %in
+ %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
%ext = zext <1 x i32> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -41,7 +41,7 @@ define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i32> addrspace(1)* %in
+ %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
%ext = sext <1 x i32> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -53,7 +53,7 @@ define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i32> addrspace(1)* %in
+ %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
%ext = zext <2 x i32> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -67,7 +67,7 @@ define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i32> addrspace(1)* %in
+ %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
%ext = sext <2 x i32> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -81,7 +81,7 @@ define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i32> addrspace(1)* %in
+ %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
%ext = zext <4 x i32> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -99,7 +99,7 @@ define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i32> addrspace(1)* %in
+ %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
%ext = sext <4 x i32> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -124,7 +124,7 @@ define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i32> addrspace(1)* %in
+ %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
%ext = zext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -159,7 +159,7 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; SI: s_endpgm
define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i32> addrspace(1)* %in
+ %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
%ext = sext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -212,7 +212,7 @@ define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i32> addrspace(1)* %in
+ %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
%ext = sext <16 x i32> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -255,7 +255,7 @@ define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; SI: s_endpgm
define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i32> addrspace(1)* %in
+ %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
%ext = zext <16 x i32> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -369,7 +369,7 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; SI: s_endpgm
define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i32> addrspace(1)* %in
+ %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
%ext = sext <32 x i32> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -450,7 +450,7 @@ define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32
; SI: s_endpgm
define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i32> addrspace(1)* %in
+ %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
%ext = zext <32 x i32> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/R600/global-extload-i8.ll
index 4c37f3f..b31d536 100644
--- a/test/CodeGen/R600/global-extload-i8.ll
+++ b/test/CodeGen/R600/global-extload-i8.ll
@@ -7,7 +7,7 @@
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in
+ %a = load i8, i8 addrspace(1)* %in
%ext = zext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -18,7 +18,7 @@ define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)*
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in
+ %a = load i8, i8 addrspace(1)* %in
%ext = sext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -27,7 +27,7 @@ define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)*
; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
; SI: s_endpgm
define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8> addrspace(1)* %in
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -36,7 +36,7 @@ define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8
; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
; SI: s_endpgm
define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8> addrspace(1)* %in
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -45,7 +45,7 @@ define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8
; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -54,7 +54,7 @@ define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8
; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -63,7 +63,7 @@ define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8
; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -72,7 +72,7 @@ define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8
; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -81,7 +81,7 @@ define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8
; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -90,7 +90,7 @@ define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8
; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -99,7 +99,7 @@ define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8
; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8> addrspace(1)* %in
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -108,7 +108,7 @@ define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8> addrspace(1)* %in
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -117,7 +117,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
; XSI: s_endpgm
; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8> addrspace(1)* %in
+; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
; %ext = zext <32 x i8> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
@@ -126,7 +126,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
; XSI: s_endpgm
; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8> addrspace(1)* %in
+; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
; %ext = sext <32 x i8> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
@@ -135,7 +135,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
; XSI: s_endpgm
; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8> addrspace(1)* %in
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = zext <64 x i8> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
@@ -144,7 +144,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
; XSI: s_endpgm
; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8> addrspace(1)* %in
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = sext <64 x i8> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
@@ -155,7 +155,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in
+ %a = load i8, i8 addrspace(1)* %in
%ext = zext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -166,7 +166,7 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in
+ %a = load i8, i8 addrspace(1)* %in
%ext = sext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -175,7 +175,7 @@ define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8> addrspace(1)* %in
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -184,7 +184,7 @@ define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8
; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8> addrspace(1)* %in
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -193,7 +193,7 @@ define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8
; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -202,7 +202,7 @@ define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8
; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -211,7 +211,7 @@ define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8
; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -220,7 +220,7 @@ define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8
; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -229,7 +229,7 @@ define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8
; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -238,7 +238,7 @@ define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8
; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -247,7 +247,7 @@ define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8
; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8> addrspace(1)* %in
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -256,7 +256,7 @@ define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8> addrspace(1)* %in
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -265,7 +265,7 @@ define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
; XSI: s_endpgm
; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8> addrspace(1)* %in
+; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
; %ext = zext <32 x i8> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
@@ -274,7 +274,7 @@ define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
; XSI: s_endpgm
; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8> addrspace(1)* %in
+; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
; %ext = sext <32 x i8> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
@@ -283,7 +283,7 @@ define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
; XSI: s_endpgm
; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8> addrspace(1)* %in
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = zext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
@@ -292,7 +292,7 @@ define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
; XSI: s_endpgm
; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8> addrspace(1)* %in
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = sext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
diff --git a/test/CodeGen/R600/global-zero-initializer.ll b/test/CodeGen/R600/global-zero-initializer.ll
index 6909c58..45aa8bf 100644
--- a/test/CodeGen/R600/global-zero-initializer.ll
+++ b/test/CodeGen/R600/global-zero-initializer.ll
@@ -6,8 +6,8 @@
@lds = addrspace(1) global [256 x i32] zeroinitializer
define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [256 x i32] addrspace(1)* @lds, i32 0, i32 10
- %ld = load i32 addrspace(1)* %gep
+ %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @lds, i32 0, i32 10
+ %ld = load i32, i32 addrspace(1)* %gep
store i32 %ld, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll
index 5a07a02..847950f 100644
--- a/test/CodeGen/R600/global_atomics.ll
+++ b/test/CodeGen/R600/global_atomics.ll
@@ -4,7 +4,7 @@
; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -14,7 +14,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -24,8 +24,8 @@ entry:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -35,8 +35,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -64,7 +64,7 @@ entry:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -74,7 +74,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -84,7 +84,7 @@ entry:
; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -94,7 +94,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -104,8 +104,8 @@ entry:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -115,8 +115,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -144,7 +144,7 @@ entry:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -154,7 +154,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -164,7 +164,7 @@ entry:
; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -174,7 +174,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -184,8 +184,8 @@ entry:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -195,8 +195,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -224,7 +224,7 @@ entry:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -234,7 +234,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -244,7 +244,7 @@ entry:
; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -254,7 +254,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -264,8 +264,8 @@ entry:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -275,8 +275,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -304,7 +304,7 @@ entry:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -314,7 +314,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -324,7 +324,7 @@ entry:
; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -334,7 +334,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -344,8 +344,8 @@ entry:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -355,8 +355,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -384,7 +384,7 @@ entry:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -394,7 +394,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -404,7 +404,7 @@ entry:
; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -414,7 +414,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -424,8 +424,8 @@ entry:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -435,8 +435,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -464,7 +464,7 @@ entry:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -474,7 +474,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -484,7 +484,7 @@ entry:
; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -494,7 +494,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -504,8 +504,8 @@ entry:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -515,8 +515,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -544,7 +544,7 @@ entry:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -554,7 +554,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -564,7 +564,7 @@ entry:
; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -574,7 +574,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -584,8 +584,8 @@ entry:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -595,8 +595,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -624,7 +624,7 @@ entry:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -634,7 +634,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -644,7 +644,7 @@ entry:
; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -654,7 +654,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -664,8 +664,8 @@ entry:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -675,8 +675,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -704,7 +704,7 @@ entry:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -714,7 +714,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -724,7 +724,7 @@ entry:
; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -734,7 +734,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -744,8 +744,8 @@ entry:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -755,8 +755,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -784,7 +784,7 @@ entry:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -794,7 +794,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll
index af0df41..014b0a5 100644
--- a/test/CodeGen/R600/gv-const-addrspace-fail.ll
+++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll
@@ -9,8 +9,8 @@
; SI: buffer_store_byte
; SI: s_endpgm
define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
- %arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
- %1 = load i8 addrspace(2)* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds [1 x i8], [1 x i8] addrspace(2)* @a, i32 0, i32 %s
+ %1 = load i8, i8 addrspace(2)* %arrayidx, align 1
store i8 %1, i8 addrspace(1)* %out
ret void
}
@@ -22,8 +22,8 @@ define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
; SI: buffer_store_short
; SI: s_endpgm
define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
- %arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
- %1 = load i16 addrspace(2)* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds [1 x i16], [1 x i16] addrspace(2)* @b, i32 0, i32 %s
+ %1 = load i16, i16 addrspace(2)* %arrayidx, align 2
store i16 %1, i16 addrspace(1)* %out
ret void
}
@@ -35,8 +35,8 @@ define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
; FUNC-LABEL: {{^}}struct_bar_gv_load:
define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
- %load = load i8 addrspace(2)* %gep, align 1
+ %gep = getelementptr inbounds [1 x %struct.bar], [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
+ %load = load i8, i8 addrspace(2)* %gep, align 1
store i8 %load, i8 addrspace(1)* %out, align 1
ret void
}
@@ -50,8 +50,8 @@ define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
; FUNC-LABEL: {{^}}array_vector_gv_load:
define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
- %load = load <4 x i32> addrspace(2)* %gep, align 16
+ %gep = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
+ %load = load <4 x i32>, <4 x i32> addrspace(2)* %gep, align 16
store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16
ret void
}
diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll
index 45af71d..3c1fc6c 100644
--- a/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/test/CodeGen/R600/gv-const-addrspace.ll
@@ -21,8 +21,8 @@
define void @float(float addrspace(1)* %out, i32 %index) {
entry:
- %0 = getelementptr inbounds [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
- %1 = load float addrspace(2)* %0
+ %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %1 = load float, float addrspace(2)* %0
store float %1, float addrspace(1)* %out
ret void
}
@@ -44,8 +44,8 @@ entry:
define void @i32(i32 addrspace(1)* %out, i32 %index) {
entry:
- %0 = getelementptr inbounds [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -59,8 +59,8 @@ entry:
; GCN: s_load_dword
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
- %load = load i32 addrspace(2)* %gep, align 4
+ %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
+ %load = load i32, i32 addrspace(2)* %gep, align 4
store i32 %load, i32 addrspace(1)* %out, align 4
ret void
}
@@ -75,8 +75,8 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
; SI: buffer_load_dword
; VI: s_load_dword
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
- %load = load <1 x i32> addrspace(2)* %gep, align 4
+ %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
+ %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
ret void
}
@@ -87,8 +87,8 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = getelementptr inbounds [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
- %2 = load float addrspace(2)* %1
+ %1 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %2 = load float, float addrspace(2)* %1
store float %2, float addrspace(1)* %out
br label %endif
diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/R600/half.ll
index 35a41c5..42ee788 100644
--- a/test/CodeGen/R600/half.ll
+++ b/test/CodeGen/R600/half.ll
@@ -5,7 +5,7 @@ define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_load_store:
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
- %val = load half addrspace(1)* %in
+ %val = load half, half addrspace(1)* %in
store half %val, half addrspace(1) * %out
ret void
}
@@ -14,7 +14,7 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
; CHECK-LABEL: {{^}}test_bitcast_from_half:
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
- %val = load half addrspace(1) * %in
+ %val = load half, half addrspace(1) * %in
%val_int = bitcast half %val to i16
store i16 %val_int, i16 addrspace(1)* %out
ret void
@@ -24,7 +24,7 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
; CHECK-LABEL: {{^}}test_bitcast_to_half:
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
- %val = load i16 addrspace(1)* %in
+ %val = load i16, i16 addrspace(1)* %in
%val_fp = bitcast i16 %val to half
store half %val_fp, half addrspace(1)* %out
ret void
@@ -34,7 +34,7 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_extend32:
; CHECK: v_cvt_f32_f16_e32
- %val16 = load half addrspace(1)* %in
+ %val16 = load half, half addrspace(1)* %in
%val32 = fpext half %val16 to float
store float %val32, float addrspace(1)* %out
ret void
@@ -45,7 +45,7 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
; CHECK: v_cvt_f32_f16_e32
; CHECK: v_cvt_f64_f32_e32
- %val16 = load half addrspace(1)* %in
+ %val16 = load half, half addrspace(1)* %in
%val64 = fpext half %val16 to double
store double %val64, double addrspace(1)* %out
ret void
@@ -55,7 +55,7 @@ define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_trunc32:
; CHECK: v_cvt_f16_f32_e32
- %val32 = load float addrspace(1)* %in
+ %val32 = load float, float addrspace(1)* %in
%val16 = fptrunc float %val32 to half
store half %val16, half addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll
index ff75b90..f911339 100644
--- a/test/CodeGen/R600/hsa.ll
+++ b/test/CodeGen/R600/hsa.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
-; HSA: {{^}}simple:
; HSA: .section .hsa.version
; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
+; HSA: {{^}}simple:
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
diff --git a/test/CodeGen/R600/i1-copy-phi.ll b/test/CodeGen/R600/i1-copy-phi.ll
index 430466e..105cd06 100644
--- a/test/CodeGen/R600/i1-copy-phi.ll
+++ b/test/CodeGen/R600/i1-copy-phi.ll
@@ -6,7 +6,7 @@
; SI: s_and_saveexec_b64
; SI: s_xor_b64
; SI: v_mov_b32_e32 [[REG]], -1{{$}}
-; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[REG]], 0
+; SI: v_cmp_ne_i32_e32 vcc, 0, [[REG]]
; SI: s_and_saveexec_b64
; SI: s_xor_b64
; SI: s_endpgm
diff --git a/test/CodeGen/R600/i8-to-double-to-float.ll b/test/CodeGen/R600/i8-to-double-to-float.ll
index 6047466..c218e19 100644
--- a/test/CodeGen/R600/i8-to-double-to-float.ll
+++ b/test/CodeGen/R600/i8-to-double-to-float.ll
@@ -3,7 +3,7 @@
;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
- %1 = load i8 addrspace(1)* %in
+ %1 = load i8, i8 addrspace(1)* %in
%2 = uitofp i8 %1 to double
%3 = fptrunc double %2 to float
store float %3, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
index 71705a6..60e59a5 100644
--- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -8,9 +8,9 @@
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
- %1 = load i32 addrspace(1)* %arrayidx1
+ %0 = load i32, i32 addrspace(1)* %in
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+ %1 = load i32, i32 addrspace(1)* %arrayidx1
%cmp = icmp eq i32 %0, %1
%value = select i1 %cmp, i32 0, i32 -1
store i32 %value, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
index 9b95fd6..8917cd6 100644
--- a/test/CodeGen/R600/imm.ll
+++ b/test/CodeGen/R600/imm.ll
@@ -225,7 +225,7 @@ define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
; CHECK: buffer_store_dword [[REG]]
define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %x = load float addrspace(1)* %in
+ %x = load float, float addrspace(1)* %in
%y = fadd float %x, 0.5
store float %y, float addrspace(1)* %out
ret void
@@ -236,7 +236,7 @@ define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addr
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
; CHECK: buffer_store_dword [[REG]]
define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %x = load float addrspace(1)* %in
+ %x = load float, float addrspace(1)* %in
%y = fadd float %x, 1024.0
store float %y, float addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
index 9cd2d84..319910f 100644
--- a/test/CodeGen/R600/indirect-addressing-si.ll
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -4,7 +4,7 @@
; Tests for indirect addressing on SI, which is implemented using dynamic
; indexing of vectors.
-; CHECK: extract_w_offset
+; CHECK-LABEL: {{^}}extract_w_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movrels_b32_e32
define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
@@ -15,7 +15,7 @@ entry:
ret void
}
-; CHECK: extract_wo_offset
+; CHECK-LABEL: {{^}}extract_wo_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movrels_b32_e32
define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
@@ -25,7 +25,7 @@ entry:
ret void
}
-; CHECK: insert_w_offset
+; CHECK-LABEL: {{^}}insert_w_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
@@ -37,7 +37,7 @@ entry:
ret void
}
-; CHECK: insert_wo_offset
+; CHECK-LABEL: {{^}}insert_wo_offset:
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movreld_b32_e32
define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll
index cb06d60..d63e1b6 100644
--- a/test/CodeGen/R600/indirect-private-64.ll
+++ b/test/CodeGen/R600/indirect-private-64.ll
@@ -14,12 +14,12 @@ declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
- %val = load double addrspace(1)* %in, align 8
+ %val = load double, double addrspace(1)* %in, align 8
%array = alloca double, i32 16, align 8
- %ptr = getelementptr double* %array, i32 %b
+ %ptr = getelementptr double, double* %array, i32 %b
store double %val, double* %ptr, align 8
call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
- %result = load double* %ptr, align 8
+ %result = load double, double* %ptr, align 8
store double %result, double addrspace(1)* %out, align 8
ret void
}
@@ -38,12 +38,12 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
- %val = load <2 x double> addrspace(1)* %in, align 16
+ %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
%array = alloca <2 x double>, i32 16, align 16
- %ptr = getelementptr <2 x double>* %array, i32 %b
+ %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
store <2 x double> %val, <2 x double>* %ptr, align 16
call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
- %result = load <2 x double>* %ptr, align 16
+ %result = load <2 x double>, <2 x double>* %ptr, align 16
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
ret void
}
@@ -56,12 +56,12 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
- %val = load i64 addrspace(1)* %in, align 8
+ %val = load i64, i64 addrspace(1)* %in, align 8
%array = alloca i64, i32 16, align 8
- %ptr = getelementptr i64* %array, i32 %b
+ %ptr = getelementptr i64, i64* %array, i32 %b
store i64 %val, i64* %ptr, align 8
call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
- %result = load i64* %ptr, align 8
+ %result = load i64, i64* %ptr, align 8
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
@@ -80,12 +80,12 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
- %val = load <2 x i64> addrspace(1)* %in, align 16
+ %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%array = alloca <2 x i64>, i32 16, align 16
- %ptr = getelementptr <2 x i64>* %array, i32 %b
+ %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
store <2 x i64> %val, <2 x i64>* %ptr, align 16
call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
- %result = load <2 x i64>* %ptr, align 16
+ %result = load <2 x i64>, <2 x i64>* %ptr, align 16
store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
ret void
}
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
index 64afddc..6de3d40 100644
--- a/test/CodeGen/R600/insert_vector_elt.ll
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -185,13 +185,13 @@ entry:
br i1 %1, label %if, label %else
if:
- %2 = load i32 addrspace(1)* %in
+ %2 = load i32, i32 addrspace(1)* %in
%3 = insertelement <2 x i32> %0, i32 %2, i32 1
br label %endif
else:
- %4 = getelementptr i32 addrspace(1)* %in, i32 1
- %5 = load i32 addrspace(1)* %4
+ %4 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %5 = load i32, i32 addrspace(1)* %4
%6 = insertelement <2 x i32> %0, i32 %5, i32 1
br label %endif
diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll
index a1cd388..f55912e 100644
--- a/test/CodeGen/R600/jump-address.ll
+++ b/test/CodeGen/R600/jump-address.ll
@@ -6,7 +6,7 @@
define void @main() #0 {
main_body:
- %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%1 = extractelement <4 x float> %0, i32 0
%2 = bitcast float %1 to i32
%3 = icmp eq i32 %2, 0
@@ -17,7 +17,7 @@ main_body:
br i1 %7, label %ENDIF, label %ELSE
ELSE: ; preds = %main_body
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%9 = extractelement <4 x float> %8, i32 0
%10 = bitcast float %9 to i32
%11 = icmp eq i32 %10, 1
@@ -40,7 +40,7 @@ ENDIF: ; preds = %IF13, %ELSE, %main_
ret void
IF13: ; preds = %ELSE
- %20 = load <4 x float> addrspace(8)* null
+ %20 = load <4 x float>, <4 x float> addrspace(8)* null
%21 = extractelement <4 x float> %20, i32 0
%22 = fsub float -0.000000e+00, %21
%23 = fadd float 0xFFF8000000000000, %22
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 27840b2..7e2291c 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -4,35 +4,35 @@
; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
define void @main1() {
main_body:
- %0 = load <4 x float> addrspace(8)* null
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
%1 = extractelement <4 x float> %0, i32 0
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%3 = extractelement <4 x float> %2, i32 0
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%5 = extractelement <4 x float> %4, i32 0
%6 = fcmp ogt float %1, 0.000000e+00
%7 = select i1 %6, float %3, float %5
- %8 = load <4 x float> addrspace(8)* null
+ %8 = load <4 x float>, <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 1
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%11 = extractelement <4 x float> %10, i32 1
- %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%13 = extractelement <4 x float> %12, i32 1
%14 = fcmp ogt float %9, 0.000000e+00
%15 = select i1 %14, float %11, float %13
- %16 = load <4 x float> addrspace(8)* null
+ %16 = load <4 x float>, <4 x float> addrspace(8)* null
%17 = extractelement <4 x float> %16, i32 2
- %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%19 = extractelement <4 x float> %18, i32 2
- %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%21 = extractelement <4 x float> %20, i32 2
%22 = fcmp ogt float %17, 0.000000e+00
%23 = select i1 %22, float %19, float %21
- %24 = load <4 x float> addrspace(8)* null
+ %24 = load <4 x float>, <4 x float> addrspace(8)* null
%25 = extractelement <4 x float> %24, i32 3
- %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%27 = extractelement <4 x float> %26, i32 3
- %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%29 = extractelement <4 x float> %28, i32 3
%30 = fcmp ogt float %25, 0.000000e+00
%31 = select i1 %30, float %27, float %29
@@ -52,35 +52,35 @@ main_body:
; CHECK-NOT: MOV
define void @main2() {
main_body:
- %0 = load <4 x float> addrspace(8)* null
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
%1 = extractelement <4 x float> %0, i32 0
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%3 = extractelement <4 x float> %2, i32 0
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 1
%6 = fcmp ogt float %1, 0.000000e+00
%7 = select i1 %6, float %3, float %5
- %8 = load <4 x float> addrspace(8)* null
+ %8 = load <4 x float>, <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 1
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%11 = extractelement <4 x float> %10, i32 0
- %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%13 = extractelement <4 x float> %12, i32 1
%14 = fcmp ogt float %9, 0.000000e+00
%15 = select i1 %14, float %11, float %13
- %16 = load <4 x float> addrspace(8)* null
+ %16 = load <4 x float>, <4 x float> addrspace(8)* null
%17 = extractelement <4 x float> %16, i32 2
- %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%19 = extractelement <4 x float> %18, i32 3
- %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%21 = extractelement <4 x float> %20, i32 2
%22 = fcmp ogt float %17, 0.000000e+00
%23 = select i1 %22, float %19, float %21
- %24 = load <4 x float> addrspace(8)* null
+ %24 = load <4 x float>, <4 x float> addrspace(8)* null
%25 = extractelement <4 x float> %24, i32 3
- %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%27 = extractelement <4 x float> %26, i32 3
- %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%29 = extractelement <4 x float> %28, i32 2
%30 = fcmp ogt float %25, 0.000000e+00
%31 = select i1 %30, float %27, float %29
diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll
index 788816c..671833d 100644
--- a/test/CodeGen/R600/large-alloca.ll
+++ b/test/CodeGen/R600/large-alloca.ll
@@ -5,10 +5,10 @@
define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
%large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32]* %large, i32 0, i32 8191
+ %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
store i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32]* %large, i32 0, i32 %y
- %0 = load i32* %gep1
+ %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+ %0 = load i32, i32* %gep1
store i32 %0, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/R600/large-constant-initializer.ll
index c8671ef..9975b1b 100644
--- a/test/CodeGen/R600/large-constant-initializer.ll
+++ b/test/CodeGen/R600/large-constant-initializer.ll
@@ -5,7 +5,7 @@
@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
define void @opencv_cvtfloat_crash(i32 addrspace(1)* %out, i32 %x) nounwind {
- %val = load i32 addrspace(2)* getelementptr ([239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
+ %val = load i32, i32 addrspace(2)* getelementptr ([239 x i32], [239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
%mul12 = mul nsw i32 %val, 7
br i1 undef, label %exit, label %bb
diff --git a/test/CodeGen/R600/lds-initializer.ll b/test/CodeGen/R600/lds-initializer.ll
index 7344eff..bf8df63 100644
--- a/test/CodeGen/R600/lds-initializer.ll
+++ b/test/CodeGen/R600/lds-initializer.ll
@@ -6,8 +6,8 @@
@lds = addrspace(3) global [8 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8]
define void @load_init_lds_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [8 x i32] addrspace(3)* @lds, i32 0, i32 10
- %ld = load i32 addrspace(3)* %gep
+ %gep = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds, i32 0, i32 10
+ %ld = load i32, i32 addrspace(3)* %gep
store i32 %ld, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/lds-oqap-crash.ll b/test/CodeGen/R600/lds-oqap-crash.ll
index fbcd778..6ff6fc3 100644
--- a/test/CodeGen/R600/lds-oqap-crash.ll
+++ b/test/CodeGen/R600/lds-oqap-crash.ll
@@ -12,7 +12,7 @@
; CHECK: {{^}}lds_crash:
define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
entry:
- %0 = load i32 addrspace(3)* %in
+ %0 = load i32, i32 addrspace(3)* %in
; This block needs to be > 115 ISA instructions to hit the bug,
; so we'll use udiv instructions.
%div0 = udiv i32 %0, %b
diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/R600/lds-output-queue.ll
index cda75b0..44ffc36 100644
--- a/test/CodeGen/R600/lds-output-queue.ll
+++ b/test/CodeGen/R600/lds-output-queue.ll
@@ -12,12 +12,12 @@
define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
entry:
- %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+ %1 = load i32, i32 addrspace(3)* %0
call void @llvm.AMDGPU.barrier.local()
; This will start a new clause for the vertex fetch
- %2 = load i32 addrspace(1)* %in
+ %2 = load i32, i32 addrspace(1)* %in
%3 = add i32 %1, %2
store i32 %3, i32 addrspace(1)* %out
ret void
@@ -40,9 +40,9 @@ declare void @llvm.AMDGPU.barrier.local()
; load from global memory which immediately follows a load from a global value that
; has been declared in the local memory space:
;
-; %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
-; %1 = load i32 addrspace(3)* %0
-; %2 = load i32 addrspace(1)* %in
+; %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+; %1 = load i32, i32 addrspace(3)* %0
+; %2 = load i32, i32 addrspace(1)* %in
;
; The instruction selection phase will generate ISA that looks like this:
; %OQAP = LDS_READ_RET
@@ -90,9 +90,9 @@ declare void @llvm.AMDGPU.barrier.local()
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
define void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
- %1 = load i32 addrspace(3)* %0
- %2 = load i32 addrspace(1)* %in
+ %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
+ %1 = load i32, i32 addrspace(3)* %0
+ %2 = load i32, i32 addrspace(1)* %in
%3 = add i32 %2, %1
store i32 %3, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/R600/lds-size.ll
index 5287723..3e83286 100644
--- a/test/CodeGen/R600/lds-size.ll
+++ b/test/CodeGen/R600/lds-size.ll
@@ -3,9 +3,9 @@
; This test makes sure we do not double count global values when they are
; used in different basic blocks.
-; CHECK-LABEL: {{^}}test:
; CHECK: .long 166120
; CHECK-NEXT: .long 1
+; CHECK-LABEL: {{^}}test:
@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
define void @test(i32 addrspace(1)* %out, i32 %cond) {
diff --git a/test/CodeGen/R600/lds-zero-initializer.ll b/test/CodeGen/R600/lds-zero-initializer.ll
index 1fb6f52..fb51bc0 100644
--- a/test/CodeGen/R600/lds-zero-initializer.ll
+++ b/test/CodeGen/R600/lds-zero-initializer.ll
@@ -6,8 +6,8 @@
@lds = addrspace(3) global [256 x i32] zeroinitializer
define void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [256 x i32] addrspace(3)* @lds, i32 0, i32 10
- %ld = load i32 addrspace(3)* %gep
+ %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10
+ %ld = load i32, i32 addrspace(3)* %gep
store i32 %ld, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
index b9fa8e9..4244c48 100644
--- a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
+++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
@@ -16,7 +16,7 @@ entry:
%0 = icmp eq i32 %in, 5
br i1 %0, label %IF, label %ENDIF
IF:
- %1 = getelementptr i32 addrspace(1)* %out, i32 1
+ %1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %1
br label %ENDIF
diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
index 8bc2583..8bf094b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.abs.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -28,7 +28,7 @@ define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
; EG: SUB_INT
; EG: MAX_INT
define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
- %val = load i32 addrspace(1)* %src, align 4
+ %val = load i32, i32 addrspace(1)* %src, align 4
%abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone
store i32 %abs, i32 addrspace(1)* %out, align 4
ret void
@@ -42,7 +42,7 @@ define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind
; EG: SUB_INT
; EG: MAX_INT
define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
- %val = load i32 addrspace(1)* %src, align 4
+ %val = load i32, i32 addrspace(1)* %src, align 4
%abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone
store i32 %abs, i32 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
index a11d9ae..db88397 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
@@ -10,14 +10,14 @@
define void @test_barrier_global(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x()
- %1 = getelementptr i32 addrspace(1)* %out, i32 %0
+ %1 = getelementptr i32, i32 addrspace(1)* %out, i32 %0
store i32 %0, i32 addrspace(1)* %1
call void @llvm.AMDGPU.barrier.global()
%2 = call i32 @llvm.r600.read.local.size.x()
%3 = sub i32 %2, 1
%4 = sub i32 %3, %0
- %5 = getelementptr i32 addrspace(1)* %out, i32 %4
- %6 = load i32 addrspace(1)* %5
+ %5 = getelementptr i32, i32 addrspace(1)* %out, i32 %4
+ %6 = load i32, i32 addrspace(1)* %5
store i32 %6, i32 addrspace(1)* %1
ret void
}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
index 76c2453..48fb2e0 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -11,14 +11,14 @@
define void @test_barrier_local(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x()
- %1 = getelementptr i32 addrspace(1)* %out, i32 %0
+ %1 = getelementptr i32, i32 addrspace(1)* %out, i32 %0
store i32 %0, i32 addrspace(1)* %1
call void @llvm.AMDGPU.barrier.local()
%2 = call i32 @llvm.r600.read.local.size.x()
%3 = sub i32 %2, 1
%4 = sub i32 %3, %0
- %5 = getelementptr i32 addrspace(1)* %out, i32 %4
- %6 = load i32 addrspace(1)* %5
+ %5 = getelementptr i32, i32 addrspace(1)* %out, i32 %4
+ %6 = load i32, i32 addrspace(1)* %5
store i32 %6, i32 addrspace(1)* %1
ret void
}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
index 2ec2546..1168713 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
@@ -44,7 +44,7 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
; FUNC-LABEL: {{^}}v_bfe_print_arg:
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
- %load = load i32 addrspace(1)* %src0, align 4
+ %load = load i32, i32 addrspace(1)* %src0, align 4
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
ret void
@@ -75,7 +75,7 @@ define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: s_endpgm
define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -89,20 +89,19 @@ define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
}
-; FIXME: The shifts should be 1 BFE
; FUNC-LABEL: {{^}}bfe_i32_test_8:
; SI: buffer_load_dword
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
; SI: s_endpgm
define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -115,7 +114,7 @@ define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -127,7 +126,7 @@ define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -139,7 +138,7 @@ define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -151,7 +150,7 @@ define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -162,7 +161,7 @@ define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -173,7 +172,7 @@ define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -407,18 +406,14 @@ define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
ret void
}
-; XXX - This should really be a single BFE, but the sext_inreg of the
-; extended type i24 is never custom lowered.
; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
; SI: buffer_load_dword [[LOAD:v[0-9]+]],
-; SI: v_lshlrev_b32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; XSI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
-; XSI-NOT: SHL
-; XSI-NOT: SHR
-; XSI: buffer_store_dword [[BFE]],
+; SI-NOT: v_lshl
+; SI-NOT: v_ashr
+; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24
+; SI: buffer_store_dword [[BFE]],
define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
%shl = shl i32 %bfe, 8
%ashr = ashr i32 %shl, 8
@@ -434,7 +429,7 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
; SI: buffer_store_dword [[TMP2]]
define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %src = load i32 addrspace(1)* %in, align 4
+ %src = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
%div = sdiv i32 %bfe, 2
store i32 %div, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
index 6cd0108..5411192 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
@@ -65,7 +65,7 @@ define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %load = load i8 addrspace(1)* %in
+ %load = load i8, i8 addrspace(1)* %in
%ext = zext i8 %load to i32
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -79,7 +79,7 @@ define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) n
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 255
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
@@ -94,7 +94,7 @@ define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %i
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 65535
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
@@ -108,7 +108,7 @@ define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %
; SI: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 255
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
@@ -123,7 +123,7 @@ define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspa
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 255
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
@@ -138,7 +138,7 @@ define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspa
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 255
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
@@ -152,7 +152,7 @@ define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspa
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 65535
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
@@ -166,14 +166,14 @@ define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrsp
; SI: s_endpgm
; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
}
define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -181,7 +181,7 @@ define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -196,7 +196,7 @@ define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%shr = lshr i32 %shl, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
@@ -211,7 +211,7 @@ define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
; SI: s_endpgm
define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%shr = ashr i32 %shl, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
@@ -224,7 +224,7 @@ define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: s_endpgm
define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -236,7 +236,7 @@ define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -249,7 +249,7 @@ define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -262,7 +262,7 @@ define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -274,7 +274,7 @@ define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -286,7 +286,7 @@ define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -298,7 +298,7 @@ define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -309,7 +309,7 @@ define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -320,7 +320,7 @@ define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -439,7 +439,7 @@ define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
-; EG-NOT: BFEfppppppppppppp
+; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
@@ -568,10 +568,60 @@ define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
i32 addrspace(1)* %out1,
i32 addrspace(1)* %in) nounwind {
- %src = load i32 addrspace(1)* %in, align 4
+ %src = load i32, i32 addrspace(1)* %in, align 4
%and = and i32 %src, 63
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
store i32 %and, i32 addrspace(1)* %out1, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}lshr_and:
+; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
+; SI: buffer_store_dword
+define void @lshr_and(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %b = lshr i32 %a, 6
+ %c = and i32 %b, 7
+ store i32 %c, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_lshr_and:
+; SI: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
+; SI: buffer_store_dword
+define void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %c = lshr i32 %a, %b
+ %d = and i32 %c, 7
+ store i32 %d, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}and_lshr:
+; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
+; SI: buffer_store_dword
+define void @and_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %b = and i32 %a, 448
+ %c = lshr i32 %b, 6
+ store i32 %c, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}and_lshr2:
+; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
+; SI: buffer_store_dword
+define void @and_lshr2(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %b = and i32 %a, 511
+ %c = lshr i32 %b, 6
+ store i32 %c, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}shl_lshr:
+; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
+; SI: buffer_store_dword
+define void @shl_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %b = shl i32 %a, 9
+ %c = lshr i32 %b, 11
+ store i32 %c, i32 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
index 2346f40..5049228 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
@@ -5,7 +5,7 @@
declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfm_arg_arg:
-; SI: v_bfm
+; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; EG: BFM_INT
define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
@@ -14,7 +14,7 @@ define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind
}
; FUNC-LABEL: {{^}}bfm_arg_imm:
-; SI: v_bfm
+; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x7b
; EG: BFM_INT
define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
@@ -23,7 +23,7 @@ define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
}
; FUNC-LABEL: {{^}}bfm_imm_arg:
-; SI: v_bfm
+; SI: s_bfm_b32 {{s[0-9]+}}, 0x7b, {{s[0-9]+}}
; EG: BFM_INT
define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
@@ -32,10 +32,29 @@ define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
}
; FUNC-LABEL: {{^}}bfm_imm_imm:
-; SI: v_bfm
+; SI: s_bfm_b32 {{s[0-9]+}}, 0x7b, 0x1c8
; EG: BFM_INT
define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone
store i32 %bfm, i32 addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}bfm_pattern:
+; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+define void @bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) {
+ %a = shl i32 1, %x
+ %b = sub i32 %a, 1
+ %c = shl i32 %b, %y
+ store i32 %c, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}bfm_pattern_simple:
+; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0
+define void @bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) {
+ %a = shl i32 1, %x
+ %b = sub i32 %a, 1
+ store i32 %b, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
index 3973f53..301de4b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.brev.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -21,7 +21,7 @@ define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
- %val = load i32 addrspace(1)* %valptr, align 4
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.class.ll b/test/CodeGen/R600/llvm.AMDGPU.class.ll
index f111eb9..805a88b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.class.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.class.ll
@@ -134,9 +134,9 @@ define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
; SI: s_endpgm
define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
%sext = sext i1 %result to i32
@@ -152,9 +152,9 @@ define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(
; SI: s_endpgm
define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %b = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
%sext = sext i1 %result to i32
@@ -172,9 +172,9 @@ define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %
; SI: s_endpgm
define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %b = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
%sext = sext i1 %result to i32
@@ -290,9 +290,9 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
; SI: s_endpgm
define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load double addrspace(1)* %in
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load double, double addrspace(1)* %in
%result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
%sext = sext i1 %result to i32
@@ -306,9 +306,9 @@ define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace
; SI: s_endpgm
define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %b = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
%sext = sext i1 %result to i32
@@ -321,9 +321,9 @@ define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %
; SI: s_endpgm
define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %b = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
%sext = sext i1 %result to i32
@@ -338,9 +338,9 @@ define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i3
; SI: s_endpgm
define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
@@ -358,9 +358,9 @@ define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)
; SI: s_endpgm
define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
@@ -381,9 +381,9 @@ define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1
; SI: s_endpgm
define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
@@ -416,9 +416,9 @@ define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float ad
; SI: s_endpgm
define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
@@ -436,9 +436,9 @@ define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)
; SI: s_endpgm
define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
@@ -456,9 +456,9 @@ define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)
; SI: s_endpgm
define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cube.ll b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
index aa07afd..e95a510 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.cube.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
@@ -8,15 +8,15 @@
; CHECK: CUBE * T{{[0-9]}}.W
define void @cube() #0 {
main_body:
- %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%1 = extractelement <4 x float> %0, i32 3
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%3 = extractelement <4 x float> %2, i32 0
%4 = fdiv float %3, %1
- %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%6 = extractelement <4 x float> %5, i32 1
%7 = fdiv float %6, %1
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%9 = extractelement <4 x float> %8, i32 2
%10 = fdiv float %9, %1
%11 = insertelement <4 x float> undef, float %4, i32 0
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
index 799817e..8b32f69 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
@@ -9,7 +9,7 @@ declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
; SI-LABEL: {{^}}test_unpack_byte0_to_float:
; SI: v_cvt_f32_ubyte0
define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -18,7 +18,7 @@ define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(
; SI-LABEL: {{^}}test_unpack_byte1_to_float:
; SI: v_cvt_f32_ubyte1
define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -27,7 +27,7 @@ define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(
; SI-LABEL: {{^}}test_unpack_byte2_to_float:
; SI: v_cvt_f32_ubyte2
define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -36,7 +36,7 @@ define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(
; SI-LABEL: {{^}}test_unpack_byte3_to_float:
; SI: v_cvt_f32_ubyte3
define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
index 239fd53..bcb7f87 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -78,7 +78,7 @@ define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b,
}
; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc:
-; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0
+; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
%cmp = icmp eq i32 %i, 0
@@ -110,21 +110,21 @@ define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, fl
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
-; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
+; SI-DAG: v_cmp_eq_i32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}}
+; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]]
; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]]
; SI: s_endpgm
define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1
- %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2
- %gep.out = getelementptr float addrspace(1)* %out, i32 2
+ %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
+ %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
- %a = load float addrspace(1)* %gep.a
- %b = load float addrspace(1)* %gep.b
- %c = load float addrspace(1)* %gep.c
+ %a = load float, float addrspace(1)* %gep.a
+ %b = load float, float addrspace(1)* %gep.b
+ %c = load float, float addrspace(1)* %gep.c
%cmp0 = icmp eq i32 %tid, 0
%cmp1 = icmp ne i32 %d, 0
@@ -136,17 +136,17 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
}
; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc:
-; SI: v_cmp_eq_i32_e64 [[CMPTID:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
-; SI: s_and_saveexec_b64 [[CMPTID]], [[CMPTID]]
-; SI: s_xor_b64 [[CMPTID]], exec, [[CMPTID]]
+; SI: v_cmp_eq_i32_e32 vcc, 0, v{{[0-9]+}}
+; SI: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
+; SI: s_xor_b64 [[SAVE]], exec, [[SAVE]]
; SI: buffer_load_dword [[LOAD:v[0-9]+]]
-; SI: v_cmp_ne_i32_e64 [[CMPLOAD:s\[[0-9]+:[0-9]+\]]], [[LOAD]], 0
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, [[CMPLOAD]]
+; SI: v_cmp_ne_i32_e32 vcc, 0, [[LOAD]]
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
; SI: BB9_2:
-; SI: s_or_b64 exec, exec, [[CMPTID]]
+; SI: s_or_b64 exec, exec, [[SAVE]]
; SI: v_cmp_ne_i32_e32 vcc, 0, v0
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: buffer_store_dword
@@ -154,20 +154,20 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind {
entry:
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.out = getelementptr float addrspace(1)* %out, i32 2
- %gep.a = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1
- %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
+ %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
+ %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
- %a = load float addrspace(1)* %gep.a
- %b = load float addrspace(1)* %gep.b
- %c = load float addrspace(1)* %gep.c
+ %a = load float, float addrspace(1)* %gep.a
+ %b = load float, float addrspace(1)* %gep.b
+ %c = load float, float addrspace(1)* %gep.c
%cmp0 = icmp eq i32 %tid, 0
br i1 %cmp0, label %bb, label %exit
bb:
- %val = load i32 addrspace(1)* %dummy
+ %val = load i32, i32 addrspace(1)* %dummy
%cmp1 = icmp ne i32 %val, 0
br label %exit
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
index 5773da0..de830de 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -13,11 +13,11 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; SI: s_endpgm
define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -33,11 +33,11 @@ define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)*
; SI: s_endpgm
define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -53,11 +53,11 @@ define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)*
; SI: s_endpgm
define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -73,11 +73,11 @@ define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)
; SI: s_endpgm
define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -93,9 +93,9 @@ define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)
; SI: s_endpgm
define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
- %b = load float addrspace(1)* %gep, align 4
+ %b = load float, float addrspace(1)* %gep, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -111,9 +111,9 @@ define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float add
; SI: s_endpgm
define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
- %b = load float addrspace(1)* %gep, align 4
+ %b = load float, float addrspace(1)* %gep, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -129,9 +129,9 @@ define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float add
; SI: s_endpgm
define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
- %a = load float addrspace(1)* %gep, align 4
+ %a = load float, float addrspace(1)* %gep, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -147,9 +147,9 @@ define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float add
; SI: s_endpgm
define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
- %a = load float addrspace(1)* %gep, align 4
+ %a = load float, float addrspace(1)* %gep, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -165,9 +165,9 @@ define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float add
; SI: s_endpgm
define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %b = load double addrspace(1)* %gep, align 8
+ %b = load double, double addrspace(1)* %gep, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -183,9 +183,9 @@ define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double a
; SI: s_endpgm
define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %b = load double addrspace(1)* %gep, align 8
+ %b = load double, double addrspace(1)* %gep, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -201,9 +201,9 @@ define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double a
; SI: s_endpgm
define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %a = load double addrspace(1)* %gep, align 8
+ %a = load double, double addrspace(1)* %gep, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -219,9 +219,9 @@ define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double a
; SI: s_endpgm
define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %a = load double addrspace(1)* %gep, align 8
+ %a = load double, double addrspace(1)* %gep, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -294,8 +294,8 @@ define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %
; SI: s_endpgm
define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %a = load float, float addrspace(1)* %gep.0, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -310,8 +310,8 @@ define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float a
; SI: s_endpgm
define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %a = load float, float addrspace(1)* %gep.0, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -327,11 +327,11 @@ define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float a
; SI: s_endpgm
define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
@@ -349,11 +349,11 @@ define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspa
; SI: s_endpgm
define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll
new file mode 100644
index 0000000..20c7af8
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.flbit.i32(i32) nounwind readnone
+
+; FUNC-LABEL: {{^}}s_flbit:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
+define void @s_flbit(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+ %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone
+ store i32 %r, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_flbit:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
+ %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone
+ store i32 %r, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll
new file mode 100644
index 0000000..e098dd3
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
+
+declare double @llvm.fabs.f64(double %Val)
+declare double @llvm.AMDGPU.fract.f64(double) nounwind readnone
+
+; FUNC-LABEL: {{^}}fract_f64:
+; GCN: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
+; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
+; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
+; CI: buffer_store_dwordx2 [[FRC]]
+define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
+ %val = load double, double addrspace(1)* %src, align 4
+ %fract = call double @llvm.AMDGPU.fract.f64(double %val) nounwind readnone
+ store double %fract, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fract_f64_neg:
+; GCN: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
+; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
+; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
+; CI: buffer_store_dwordx2 [[FRC]]
+define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
+ %val = load double, double addrspace(1)* %src, align 4
+ %neg = fsub double 0.0, %val
+ %fract = call double @llvm.AMDGPU.fract.f64(double %neg) nounwind readnone
+ store double %fract, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fract_f64_neg_abs:
+; GCN: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
+; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
+; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
+; CI: buffer_store_dwordx2 [[FRC]]
+define void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
+ %val = load double, double addrspace(1)* %src, align 4
+ %abs = call double @llvm.fabs.f64(double %val)
+ %neg = fsub double 0.0, %abs
+ %fract = call double @llvm.AMDGPU.fract.f64(double %neg) nounwind readnone
+ store double %fract, double addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
index 7d15300..7501b4b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.fract.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -1,28 +1,65 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+declare float @llvm.fabs.f32(float %Val)
declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
; Legacy name
declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}fract_f32:
-; SI: v_fract_f32
+; CI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
+; SI: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
+; SI: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
+; GCN: buffer_store_dword [[RESULT]]
; EG: FRACT
define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
- %val = load float addrspace(1)* %src, align 4
+ %val = load float, float addrspace(1)* %src, align 4
%fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone
store float %fract, float addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}fract_f32_legacy_amdil:
-; SI: v_fract_f32
+; CI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
+; SI: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
+; SI: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
+; GCN: buffer_store_dword [[RESULT]]
; EG: FRACT
define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
- %val = load float addrspace(1)* %src, align 4
+ %val = load float, float addrspace(1)* %src, align 4
%fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone
store float %fract, float addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}fract_f32_neg:
+; CI: v_fract_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT:v[0-9]+]]
+; SI: v_floor_f32_e64 [[FLR:v[0-9]+]], -[[INPUT:v[0-9]+]]
+; SI: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT]], [[FLR]]
+; GCN: buffer_store_dword [[RESULT]]
+; EG: FRACT
+define void @fract_f32_neg(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
+ %val = load float, float addrspace(1)* %src, align 4
+ %neg = fsub float 0.0, %val
+ %fract = call float @llvm.AMDGPU.fract.f32(float %neg) nounwind readnone
+ store float %fract, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fract_f32_neg_abs:
+; CI: v_fract_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
+; SI: v_floor_f32_e64 [[FLR:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
+; SI: v_sub_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT]]|, [[FLR]]
+; GCN: buffer_store_dword [[RESULT]]
+; EG: FRACT
+define void @fract_f32_neg_abs(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
+ %val = load float, float addrspace(1)* %src, align 4
+ %abs = call float @llvm.fabs.f32(float %val)
+ %neg = fsub float 0.0, %abs
+ %fract = call float @llvm.AMDGPU.fract.f32(float %neg) nounwind readnone
+ store float %fract, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
index ce7fca0..46662f9 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -5,7 +5,7 @@
; SI: v_max_i32_e32
define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %load)
%bc = bitcast i32 %max to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
index 15cd38b..34b454e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -5,7 +5,7 @@
; SI: v_min_i32_e32
define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %load)
%bc = bitcast i32 %min to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
index aac014b..1020660 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.tex.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -18,7 +18,7 @@
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %addr = load <4 x float> addrspace(1)* %in
+ %addr = load <4 x float>, <4 x float> addrspace(1)* %in
%res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
%res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
%res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
index 5829f73..6b546a7 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -10,8 +10,8 @@ declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load double addrspace(1)* %aptr, align 8
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load double, double addrspace(1)* %aptr, align 8
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
@@ -23,7 +23,7 @@ define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)*
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
- %a = load double addrspace(1)* %aptr, align 8
+ %a = load double, double addrspace(1)* %aptr, align 8
%result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
index 88613db..77a073b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -25,12 +25,12 @@ define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2
; SI: buffer_store_dword [[RESULT]]
define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %src0.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %src2.gep = getelementptr i32 addrspace(1)* %src0.gep, i32 1
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %src0.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %src2.gep = getelementptr i32, i32 addrspace(1)* %src0.gep, i32 1
- %src0 = load i32 addrspace(1)* %src0.gep, align 4
- %src2 = load i32 addrspace(1)* %src2.gep, align 4
+ %src0 = load i32, i32 addrspace(1)* %src0.gep, align 4
+ %src2 = load i32, i32 addrspace(1)* %src2.gep, align 4
%mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
store i32 %mad, i32 addrspace(1)* %out.gep, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
index 4320dfe..a97d103 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -5,7 +5,7 @@
; SI: v_max_u32_e32
define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %load)
%bc = bitcast i32 %max to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
@@ -28,7 +28,7 @@ entry:
; SI-NOT: and
; SI: buffer_store_short [[RESULT]],
define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
- %tmp5 = load i8 addrspace(1)* %src, align 1
+ %tmp5 = load i8, i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
index e4cac33..2acd10e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -5,7 +5,7 @@
; SI: v_min_u32_e32
define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %load)
%bc = bitcast i32 %min to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
@@ -28,7 +28,7 @@ entry:
; SI-NOT: and
; SI: buffer_store_short [[RESULT]],
define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
- %tmp5 = load i8 addrspace(1)* %src, align 1
+ %tmp5 = load i8, i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll
index 35e4591..b67716c 100644
--- a/test/CodeGen/R600/llvm.SI.imageload.ll
+++ b/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -88,16 +88,16 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0
- %21 = load float addrspace(2)* addrspace(2)* %20, !tbaa !2
- %22 = getelementptr float addrspace(2)* %21, i32 0
- %23 = load float addrspace(2)* %22, !tbaa !2, !invariant.load !1
- %24 = getelementptr float addrspace(2)* %21, i32 1
- %25 = load float addrspace(2)* %24, !tbaa !2, !invariant.load !1
- %26 = getelementptr float addrspace(2)* %21, i32 4
- %27 = load float addrspace(2)* %26, !tbaa !2, !invariant.load !1
- %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
- %29 = load <32 x i8> addrspace(2)* %28, !tbaa !2
+ %20 = getelementptr float addrspace(2)*, float addrspace(2)* addrspace(2)* %0, i32 0
+ %21 = load float addrspace(2)*, float addrspace(2)* addrspace(2)* %20, !tbaa !2
+ %22 = getelementptr float, float addrspace(2)* %21, i32 0
+ %23 = load float, float addrspace(2)* %22, !tbaa !2, !invariant.load !1
+ %24 = getelementptr float, float addrspace(2)* %21, i32 1
+ %25 = load float, float addrspace(2)* %24, !tbaa !2, !invariant.load !1
+ %26 = getelementptr float, float addrspace(2)* %21, i32 4
+ %27 = load float, float addrspace(2)* %26, !tbaa !2, !invariant.load !1
+ %28 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
+ %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, !tbaa !2
%30 = bitcast float %27 to i32
%31 = bitcast float %23 to i32
%32 = bitcast float %25 to i32
diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll
index d2e6a8e..f6c2585 100644
--- a/test/CodeGen/R600/llvm.SI.load.dword.ll
+++ b/test/CodeGen/R600/llvm.SI.load.dword.ll
@@ -16,8 +16,8 @@
define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 {
main_body:
- %tmp = getelementptr [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
- %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
+ %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
%tmp11 = shl i32 %arg6, 2
%tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
%tmp13 = bitcast i32 %tmp12 to float
diff --git a/test/CodeGen/R600/llvm.amdgpu.dp4.ll b/test/CodeGen/R600/llvm.amdgpu.dp4.ll
index 812b6a4..036cd2c 100644
--- a/test/CodeGen/R600/llvm.amdgpu.dp4.ll
+++ b/test/CodeGen/R600/llvm.amdgpu.dp4.ll
@@ -3,8 +3,8 @@
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) nounwind readnone
define void @test_dp4(float addrspace(1)* %out, <4 x float> addrspace(1)* %a, <4 x float> addrspace(1)* %b) nounwind {
- %src0 = load <4 x float> addrspace(1)* %a, align 16
- %src1 = load <4 x float> addrspace(1)* %b, align 16
+ %src0 = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
+ %src1 = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
%dp4 = call float @llvm.AMDGPU.dp4(<4 x float> %src0, <4 x float> %src1) nounwind readnone
store float %dp4, float addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.round.f64.ll b/test/CodeGen/R600/llvm.round.f64.ll
index 920dbb3..3d0f57e 100644
--- a/test/CodeGen/R600/llvm.round.f64.ll
+++ b/test/CodeGen/R600/llvm.round.f64.ll
@@ -21,7 +21,7 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
; SI-DAG: v_cmp_eq_i32
; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
-; SI-DAG: v_cmp_lt_i32_e64
+; SI-DAG: v_cmp_gt_i32_e64
; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
; SI-DAG: v_cmp_gt_i32_e64
@@ -31,9 +31,9 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
; SI: s_endpgm
define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr double addrspace(1)* %out, i32 %tid
- %x = load double addrspace(1)* %gep
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %x = load double, double addrspace(1)* %gep
%result = call double @llvm.round.f64(double %x) #1
store double %result, double addrspace(1)* %out.gep
ret void
diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll
index 8d1cfb6..f5f124d 100644
--- a/test/CodeGen/R600/llvm.round.ll
+++ b/test/CodeGen/R600/llvm.round.ll
@@ -9,7 +9,7 @@
; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
-; SI: v_cmp_ge_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SUB]]|, 0.5
+; SI: v_cmp_le_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0.5, |[[SUB]]|
; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]]
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
; SI: buffer_store_dword [[RESULT]]
diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll
index 315c0a3..0ca49fd 100644
--- a/test/CodeGen/R600/load-i1.ll
+++ b/test/CodeGen/R600/load-i1.ll
@@ -11,7 +11,7 @@
; EG: VTX_READ_8
; EG: AND_INT
define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
store i1 %load, i1 addrspace(1)* %out, align 1
ret void
}
@@ -26,7 +26,7 @@ define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) n
; EG: AND_INT
; EG: LDS_BYTE_WRITE
define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
- %load = load i1 addrspace(3)* %in
+ %load = load i1, i1 addrspace(3)* %in
store i1 %load, i1 addrspace(3)* %out, align 1
ret void
}
@@ -40,7 +40,7 @@ define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) no
; EG: VTX_READ_8
; EG: AND_INT
define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
- %load = load i1 addrspace(2)* %in
+ %load = load i1, i1 addrspace(2)* %in
store i1 %load, i1 addrspace(1)* %out, align 1
ret void
}
@@ -54,7 +54,7 @@ define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in)
; EG: VTX_READ_8
; EG: BFE_INT
define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -66,7 +66,7 @@ define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
; SI: s_endpgm
define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -78,7 +78,7 @@ define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i64
store i64 %ext, i64 addrspace(1)* %out, align 4
ret void
@@ -90,7 +90,7 @@ define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i64
store i64 %ext, i64 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll
index 265fa9b..1daf0e6 100644
--- a/test/CodeGen/R600/load-input-fold.ll
+++ b/test/CodeGen/R600/load-input-fold.ll
@@ -14,71 +14,71 @@ main_body:
%9 = extractelement <4 x float> %reg3, i32 1
%10 = extractelement <4 x float> %reg3, i32 2
%11 = extractelement <4 x float> %reg3, i32 3
- %12 = load <4 x float> addrspace(8)* null
+ %12 = load <4 x float>, <4 x float> addrspace(8)* null
%13 = extractelement <4 x float> %12, i32 0
%14 = fmul float %0, %13
- %15 = load <4 x float> addrspace(8)* null
+ %15 = load <4 x float>, <4 x float> addrspace(8)* null
%16 = extractelement <4 x float> %15, i32 1
%17 = fmul float %0, %16
- %18 = load <4 x float> addrspace(8)* null
+ %18 = load <4 x float>, <4 x float> addrspace(8)* null
%19 = extractelement <4 x float> %18, i32 2
%20 = fmul float %0, %19
- %21 = load <4 x float> addrspace(8)* null
+ %21 = load <4 x float>, <4 x float> addrspace(8)* null
%22 = extractelement <4 x float> %21, i32 3
%23 = fmul float %0, %22
- %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%25 = extractelement <4 x float> %24, i32 0
%26 = fmul float %1, %25
%27 = fadd float %26, %14
- %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%29 = extractelement <4 x float> %28, i32 1
%30 = fmul float %1, %29
%31 = fadd float %30, %17
- %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%33 = extractelement <4 x float> %32, i32 2
%34 = fmul float %1, %33
%35 = fadd float %34, %20
- %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%37 = extractelement <4 x float> %36, i32 3
%38 = fmul float %1, %37
%39 = fadd float %38, %23
- %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = fmul float %2, %41
%43 = fadd float %42, %27
- %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%45 = extractelement <4 x float> %44, i32 1
%46 = fmul float %2, %45
%47 = fadd float %46, %31
- %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%49 = extractelement <4 x float> %48, i32 2
%50 = fmul float %2, %49
%51 = fadd float %50, %35
- %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%53 = extractelement <4 x float> %52, i32 3
%54 = fmul float %2, %53
%55 = fadd float %54, %39
- %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%57 = extractelement <4 x float> %56, i32 0
%58 = fmul float %3, %57
%59 = fadd float %58, %43
- %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%61 = extractelement <4 x float> %60, i32 1
%62 = fmul float %3, %61
%63 = fadd float %62, %47
- %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%65 = extractelement <4 x float> %64, i32 2
%66 = fmul float %3, %65
%67 = fadd float %66, %51
- %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%69 = extractelement <4 x float> %68, i32 3
%70 = fmul float %3, %69
%71 = fadd float %70, %55
- %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%73 = extractelement <4 x float> %72, i32 0
- %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%75 = extractelement <4 x float> %74, i32 1
- %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%77 = extractelement <4 x float> %76, i32 2
%78 = insertelement <4 x float> undef, float %4, i32 0
%79 = insertelement <4 x float> %78, float %5, i32 1
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index b71b7cb..e285831 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -13,7 +13,7 @@
; SI: buffer_load_ubyte v{{[0-9]+}},
define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %1 = load i8 addrspace(1)* %in
+ %1 = load i8, i8 addrspace(1)* %in
%2 = zext i8 %1 to i32
store i32 %2, i32 addrspace(1)* %out
ret void
@@ -28,7 +28,7 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
; SI: buffer_load_sbyte
define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
- %0 = load i8 addrspace(1)* %in
+ %0 = load i8, i8 addrspace(1)* %in
%1 = sext i8 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -41,7 +41,7 @@ entry:
; SI: buffer_load_ubyte
define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
entry:
- %0 = load <2 x i8> addrspace(1)* %in
+ %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
%1 = zext <2 x i8> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -62,7 +62,7 @@ entry:
; SI: buffer_load_sbyte
define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
entry:
- %0 = load <2 x i8> addrspace(1)* %in
+ %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
%1 = sext <2 x i8> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -79,7 +79,7 @@ entry:
; SI: buffer_load_ubyte
define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
entry:
- %0 = load <4 x i8> addrspace(1)* %in
+ %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
%1 = zext <4 x i8> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -112,7 +112,7 @@ entry:
; SI: buffer_load_sbyte
define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
entry:
- %0 = load <4 x i8> addrspace(1)* %in
+ %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
%1 = sext <4 x i8> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -124,7 +124,7 @@ entry:
; SI: buffer_load_ushort
define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
- %0 = load i16 addrspace(1)* %in
+ %0 = load i16 , i16 addrspace(1)* %in
%1 = zext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -139,7 +139,7 @@ entry:
; SI: buffer_load_sshort
define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
- %0 = load i16 addrspace(1)* %in
+ %0 = load i16, i16 addrspace(1)* %in
%1 = sext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -152,7 +152,7 @@ entry:
; SI: buffer_load_ushort
define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
- %0 = load <2 x i16> addrspace(1)* %in
+ %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
%1 = zext <2 x i16> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -173,7 +173,7 @@ entry:
; SI: buffer_load_sshort
define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
- %0 = load <2 x i16> addrspace(1)* %in
+ %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
%1 = sext <2 x i16> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -190,7 +190,7 @@ entry:
; SI: buffer_load_ushort
define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
- %0 = load <4 x i16> addrspace(1)* %in
+ %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
%1 = zext <4 x i16> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -223,7 +223,7 @@ entry:
; SI: buffer_load_sshort
define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
- %0 = load <4 x i16> addrspace(1)* %in
+ %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
%1 = sext <4 x i16> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -236,7 +236,7 @@ entry:
; SI: buffer_load_dword v{{[0-9]+}}
define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -248,7 +248,7 @@ entry:
; SI: buffer_load_dword v{{[0-9]+}}
define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
entry:
- %0 = load float addrspace(1)* %in
+ %0 = load float, float addrspace(1)* %in
store float %0, float addrspace(1)* %out
ret void
}
@@ -260,7 +260,7 @@ entry:
; SI: buffer_load_dwordx2
define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
entry:
- %0 = load <2 x float> addrspace(1)* %in
+ %0 = load <2 x float>, <2 x float> addrspace(1)* %in
store <2 x float> %0, <2 x float> addrspace(1)* %out
ret void
}
@@ -270,7 +270,7 @@ entry:
; SI: buffer_load_dwordx2
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
- %0 = load i64 addrspace(1)* %in
+ %0 = load i64, i64 addrspace(1)* %in
store i64 %0, i64 addrspace(1)* %out
ret void
}
@@ -284,7 +284,7 @@ entry:
define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
%1 = sext i32 %0 to i64
store i64 %1, i64 addrspace(1)* %out
ret void
@@ -295,7 +295,7 @@ entry:
; R600: MEM_RAT
define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
%1 = zext i32 %0 to i64
store i64 %1, i64 addrspace(1)* %out
ret void
@@ -315,7 +315,7 @@ entry:
; SI: buffer_load_dword
define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
entry:
- %0 = load <8 x i32> addrspace(1)* %in
+ %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
store <8 x i32> %0, <8 x i32> addrspace(1)* %out
ret void
}
@@ -344,7 +344,7 @@ entry:
; SI: buffer_load_dword
define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
entry:
- %0 = load <16 x i32> addrspace(1)* %in
+ %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
store <16 x i32> %0, <16 x i32> addrspace(1)* %out
ret void
}
@@ -363,7 +363,7 @@ entry:
; SI: buffer_load_sbyte v{{[0-9]+}},
define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
- %0 = load i8 addrspace(2)* %in
+ %0 = load i8, i8 addrspace(2)* %in
%1 = sext i8 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -375,7 +375,7 @@ entry:
; SI: buffer_load_ubyte v{{[0-9]+}},
define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
- %0 = load i8 addrspace(2)* %in
+ %0 = load i8, i8 addrspace(2)* %in
%1 = zext i8 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -387,8 +387,8 @@ entry:
; SI: buffer_load_ubyte v{{[0-9]+}},
define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
- %0 = getelementptr i8 addrspace(2)* %in, i32 1
- %1 = load i8 addrspace(2)* %0
+ %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
+ %1 = load i8, i8 addrspace(2)* %0
%2 = zext i8 %1 to i32
store i32 %2, i32 addrspace(1)* %out
ret void
@@ -404,7 +404,7 @@ entry:
; SI: buffer_load_sshort
define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
- %0 = load i16 addrspace(2)* %in
+ %0 = load i16, i16 addrspace(2)* %in
%1 = sext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -416,7 +416,7 @@ entry:
; SI: buffer_load_ushort
define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
- %0 = load i16 addrspace(2)* %in
+ %0 = load i16, i16 addrspace(2)* %in
%1 = zext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -428,8 +428,8 @@ entry:
; SI: buffer_load_ushort
define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
- %0 = getelementptr i16 addrspace(2)* %in, i32 1
- %1 = load i16 addrspace(2)* %0
+ %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
+ %1 = load i16, i16 addrspace(2)* %0
%2 = zext i16 %1 to i32
store i32 %2, i32 addrspace(1)* %out
ret void
@@ -442,7 +442,7 @@ entry:
; SI: s_load_dword s{{[0-9]+}}
define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
- %0 = load i32 addrspace(2)* %in
+ %0 = load i32, i32 addrspace(2)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -453,7 +453,7 @@ entry:
; SI: s_load_dword s{{[0-9]+}}
define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
- %1 = load float addrspace(2)* %in
+ %1 = load float, float addrspace(2)* %in
store float %1, float addrspace(1)* %out
ret void
}
@@ -469,7 +469,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
; SI: s_mov_b32 m0
; SI: ds_read_u8
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
- %1 = load i8 addrspace(3)* %in
+ %1 = load i8, i8 addrspace(3)* %in
%2 = zext i8 %1 to i32
store i32 %2, i32 addrspace(1)* %out
ret void
@@ -483,7 +483,7 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
; SI: ds_read_i8
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
entry:
- %0 = load i8 addrspace(3)* %in
+ %0 = load i8, i8 addrspace(3)* %in
%1 = sext i8 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -498,7 +498,7 @@ entry:
; SI: ds_read_u8
define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
entry:
- %0 = load <2 x i8> addrspace(3)* %in
+ %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
%1 = zext <2 x i8> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -515,7 +515,7 @@ entry:
; SI: ds_read_i8
define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
entry:
- %0 = load <2 x i8> addrspace(3)* %in
+ %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
%1 = sext <2 x i8> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -534,7 +534,7 @@ entry:
; SI: ds_read_u8
define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
entry:
- %0 = load <4 x i8> addrspace(3)* %in
+ %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
%1 = zext <4 x i8> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -557,7 +557,7 @@ entry:
; SI: ds_read_i8
define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
entry:
- %0 = load <4 x i8> addrspace(3)* %in
+ %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
%1 = sext <4 x i8> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -571,7 +571,7 @@ entry:
; SI: ds_read_u16
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
- %0 = load i16 addrspace(3)* %in
+ %0 = load i16 , i16 addrspace(3)* %in
%1 = zext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -585,7 +585,7 @@ entry:
; SI: ds_read_i16
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
- %0 = load i16 addrspace(3)* %in
+ %0 = load i16, i16 addrspace(3)* %in
%1 = sext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -600,7 +600,7 @@ entry:
; SI: ds_read_u16
define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
entry:
- %0 = load <2 x i16> addrspace(3)* %in
+ %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
%1 = zext <2 x i16> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -617,7 +617,7 @@ entry:
; SI: ds_read_i16
define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
entry:
- %0 = load <2 x i16> addrspace(3)* %in
+ %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
%1 = sext <2 x i16> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -636,7 +636,7 @@ entry:
; SI: ds_read_u16
define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
entry:
- %0 = load <4 x i16> addrspace(3)* %in
+ %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
%1 = zext <4 x i16> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -659,7 +659,7 @@ entry:
; SI: ds_read_i16
define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
entry:
- %0 = load <4 x i16> addrspace(3)* %in
+ %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
%1 = sext <4 x i16> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -673,7 +673,7 @@ entry:
; SI: ds_read_b32
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %0 = load i32 addrspace(3)* %in
+ %0 = load i32, i32 addrspace(3)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -685,7 +685,7 @@ entry:
; SI: ds_read_b32
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
entry:
- %0 = load float addrspace(3)* %in
+ %0 = load float, float addrspace(3)* %in
store float %0, float addrspace(1)* %out
ret void
}
@@ -698,7 +698,7 @@ entry:
; SI: ds_read_b64
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
entry:
- %0 = load <2 x float> addrspace(3)* %in
+ %0 = load <2 x float>, <2 x float> addrspace(3)* %in
store <2 x float> %0, <2 x float> addrspace(1)* %out
ret void
}
@@ -711,10 +711,10 @@ entry:
; SI-DAG: ds_read_b32
; SI-DAG: ds_read2_b32
define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
- %scalar = load i32 addrspace(3)* %in
+ %scalar = load i32, i32 addrspace(3)* %in
%tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
- %vec_ptr = getelementptr <2 x i32> addrspace(3)* %tmp0, i32 2
- %vec0 = load <2 x i32> addrspace(3)* %vec_ptr, align 4
+ %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
+ %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
%vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
%vec = add <2 x i32> %vec0, %vec1
store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
@@ -732,9 +732,9 @@ define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)
; R600: LDS_READ_RET
define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %tmp0 = getelementptr [512 x i32] addrspace(3)* @lds, i32 0, i32 1
- %tmp1 = load i32 addrspace(3)* %tmp0
- %tmp2 = getelementptr i32 addrspace(1)* %out, i32 1
+ %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
+ %tmp1 = load i32, i32 addrspace(3)* %tmp0
+ %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp1, i32 addrspace(1)* %tmp2
ret void
}
diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/R600/load.vec.ll
index 346d8dc..02f883c 100644
--- a/test/CodeGen/R600/load.vec.ll
+++ b/test/CodeGen/R600/load.vec.ll
@@ -8,7 +8,7 @@
; SI: {{^}}load_v2i32:
; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %a = load <2 x i32> addrspace(1) * %in
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
store <2 x i32> %a, <2 x i32> addrspace(1)* %out
ret void
}
@@ -19,7 +19,7 @@ define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
; SI: {{^}}load_v4i32:
; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %a = load <4 x i32> addrspace(1) * %in
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
store <4 x i32> %a, <4 x i32> addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll
index cb3d654..74beabd 100644
--- a/test/CodeGen/R600/load64.ll
+++ b/test/CodeGen/R600/load64.ll
@@ -6,7 +6,7 @@
; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %1 = load double addrspace(1)* %in
+ %1 = load double, double addrspace(1)* %in
store double %1, double addrspace(1)* %out
ret void
}
@@ -15,7 +15,7 @@ define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %tmp = load i64 addrspace(1)* %in
+ %tmp = load i64, i64 addrspace(1)* %in
store i64 %tmp, i64 addrspace(1)* %out, align 8
ret void
}
@@ -25,7 +25,7 @@ define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
- %1 = load double addrspace(2)* %in
+ %1 = load double, double addrspace(2)* %in
store double %1, double addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll
index 4b45169..33f3159 100644
--- a/test/CodeGen/R600/local-64.ll
+++ b/test/CodeGen/R600/local-64.ll
@@ -6,8 +6,8 @@
; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28
; BOTH: buffer_store_dword [[REG]],
define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
- %gep = getelementptr i32 addrspace(3)* %in, i32 7
- %val = load i32 addrspace(3)* %gep, align 4
+ %gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
+ %val = load i32, i32 addrspace(3)* %gep, align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
@@ -16,7 +16,7 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw
; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}}
; BOTH: buffer_store_dword [[REG]],
define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
- %val = load i32 addrspace(3)* %in, align 4
+ %val = load i32, i32 addrspace(3)* %in, align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
@@ -26,8 +26,8 @@ define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %
; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535
; BOTH: buffer_store_byte [[REG]],
define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
- %gep = getelementptr i8 addrspace(3)* %in, i32 65535
- %val = load i8 addrspace(3)* %gep, align 4
+ %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65535
+ %val = load i8, i8 addrspace(3)* %gep, align 4
store i8 %val, i8 addrspace(1)* %out, align 4
ret void
}
@@ -41,8 +41,8 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]]
; BOTH: buffer_store_byte [[REG]],
define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
- %gep = getelementptr i8 addrspace(3)* %in, i32 65536
- %val = load i8 addrspace(3)* %gep, align 4
+ %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65536
+ %val = load i8, i8 addrspace(3)* %gep, align 4
store i8 %val, i8 addrspace(1)* %out, align 4
ret void
}
@@ -52,8 +52,8 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa
; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
- %gep = getelementptr i64 addrspace(3)* %in, i32 7
- %val = load i64 addrspace(3)* %gep, align 8
+ %gep = getelementptr i64, i64 addrspace(3)* %in, i32 7
+ %val = load i64, i64 addrspace(3)* %gep, align 8
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -62,7 +62,7 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw
; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
- %val = load i64 addrspace(3)* %in, align 8
+ %val = load i64, i64 addrspace(3)* %in, align 8
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -72,8 +72,8 @@ define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %
; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
- %gep = getelementptr double addrspace(3)* %in, i32 7
- %val = load double addrspace(3)* %gep, align 8
+ %gep = getelementptr double, double addrspace(3)* %in, i32 7
+ %val = load double, double addrspace(3)* %gep, align 8
store double %val, double addrspace(1)* %out, align 8
ret void
}
@@ -82,7 +82,7 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in)
; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
- %val = load double addrspace(3)* %in, align 8
+ %val = load double, double addrspace(3)* %in, align 8
store double %val, double addrspace(1)* %out, align 8
ret void
}
@@ -91,7 +91,7 @@ define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace
; BOTH-NOT: ADD
; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
- %gep = getelementptr i64 addrspace(3)* %out, i32 7
+ %gep = getelementptr i64, i64 addrspace(3)* %out, i32 7
store i64 5678, i64 addrspace(3)* %gep, align 8
ret void
}
@@ -108,7 +108,7 @@ define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
; BOTH-NOT: ADD
; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
define void @local_f64_store(double addrspace(3)* %out) nounwind {
- %gep = getelementptr double addrspace(3)* %out, i32 7
+ %gep = getelementptr double, double addrspace(3)* %out, i32 7
store double 16.0, double addrspace(3)* %gep, align 8
ret void
}
@@ -126,7 +126,7 @@ define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120
; BOTH: s_endpgm
define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
- %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
+ %gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7
store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
ret void
}
@@ -149,7 +149,7 @@ define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248
; BOTH: s_endpgm
define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
- %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
+ %gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7
store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
ret void
}
diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll
index 29921b6..2aaf977 100644
--- a/test/CodeGen/R600/local-atomics.ll
+++ b/test/CodeGen/R600/local-atomics.ll
@@ -22,7 +22,7 @@ define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -48,7 +48,7 @@ define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -62,7 +62,7 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -85,7 +85,7 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
; GCN: s_endpgm
define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -99,7 +99,7 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -120,7 +120,7 @@ define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -143,7 +143,7 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
; GCN: s_endpgm
define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -164,7 +164,7 @@ define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -185,7 +185,7 @@ define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %pt
; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -206,7 +206,7 @@ define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -235,7 +235,7 @@ define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -256,7 +256,7 @@ define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -277,7 +277,7 @@ define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -298,7 +298,7 @@ define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -319,7 +319,7 @@ define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -340,7 +340,7 @@ define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -352,7 +352,7 @@ define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -371,7 +371,7 @@ define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
; GCN: s_endpgm
define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
ret void
}
@@ -383,7 +383,7 @@ define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
ret void
}
@@ -400,7 +400,7 @@ define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -419,7 +419,7 @@ define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
; GCN: s_endpgm
define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
ret void
}
@@ -436,7 +436,7 @@ define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -453,7 +453,7 @@ define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -470,7 +470,7 @@ define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -494,7 +494,7 @@ define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -511,7 +511,7 @@ define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -528,7 +528,7 @@ define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -545,7 +545,7 @@ define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll
index 50d039f..0ffa5e7 100644
--- a/test/CodeGen/R600/local-atomics64.ll
+++ b/test/CodeGen/R600/local-atomics64.ll
@@ -14,7 +14,7 @@ define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -39,7 +39,7 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; GCN: buffer_store_dwordx2 [[RESULT]],
; GCN: s_endpgm
define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -61,7 +61,7 @@ define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; GCN: ds_inc_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -80,7 +80,7 @@ define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; GCN: ds_sub_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -102,7 +102,7 @@ define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; GCN: ds_dec_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -121,7 +121,7 @@ define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; GCN: ds_and_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -140,7 +140,7 @@ define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %pt
; GCN: ds_or_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -159,7 +159,7 @@ define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; GCN: ds_xor_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -186,7 +186,7 @@ define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; GCN: ds_min_rtn_i64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -205,7 +205,7 @@ define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
; GCN: ds_max_rtn_i64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -224,7 +224,7 @@ define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
; GCN: ds_min_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -243,7 +243,7 @@ define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
; GCN: ds_max_rtn_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -261,7 +261,7 @@ define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -283,7 +283,7 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
; GCN: s_endpgm
define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
ret void
}
@@ -302,7 +302,7 @@ define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_inc_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
ret void
}
@@ -319,7 +319,7 @@ define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_sub_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -338,7 +338,7 @@ define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_dec_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
ret void
}
@@ -355,7 +355,7 @@ define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_and_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -372,7 +372,7 @@ define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_or_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -389,7 +389,7 @@ define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_xor_b64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -413,7 +413,7 @@ define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_min_i64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -430,7 +430,7 @@ define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_max_i64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -447,7 +447,7 @@ define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_min_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -464,7 +464,7 @@ define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; GCN: ds_max_u64 {{.*}} offset:32
; GCN: s_endpgm
define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index 3d90ab1..caa4b19 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -5,7 +5,6 @@
@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
-; EG: {{^}}local_memory_two_objects:
; Check that the LDS size emitted correctly
; EG: .long 166120
@@ -13,6 +12,8 @@
; GCN: .long 47180
; GCN-NEXT: .long 38792
+; EG: {{^}}local_memory_two_objects:
+
; We would like to check the the lds writes are using different
; addresses, but due to variations in the scheduler, we can't do
; this consistently on evergreen GPUs.
@@ -37,21 +38,21 @@
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry:
%x.i = call i32 @llvm.r600.read.tidig.x() #0
- %arrayidx = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
store i32 %x.i, i32 addrspace(3)* %arrayidx, align 4
%mul = shl nsw i32 %x.i, 1
- %arrayidx1 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
+ %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
store i32 %mul, i32 addrspace(3)* %arrayidx1, align 4
%sub = sub nsw i32 3, %x.i
call void @llvm.AMDGPU.barrier.local()
- %arrayidx2 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
- %0 = load i32 addrspace(3)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds i32 addrspace(1)* %out, i32 %x.i
+ %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
+ %0 = load i32, i32 addrspace(3)* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i
store i32 %0, i32 addrspace(1)* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
- %1 = load i32 addrspace(3)* %arrayidx4, align 4
+ %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
+ %1 = load i32, i32 addrspace(3)* %arrayidx4, align 4
%add = add nsw i32 %x.i, 4
- %arrayidx5 = getelementptr inbounds i32 addrspace(1)* %out, i32 %add
+ %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add
store i32 %1, i32 addrspace(1)* %arrayidx5, align 4
ret void
}
diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll
index 68e72c5..9494ed7 100644
--- a/test/CodeGen/R600/local-memory.ll
+++ b/test/CodeGen/R600/local-memory.ll
@@ -4,7 +4,6 @@
@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
-; FUNC-LABEL: {{^}}local_memory:
; Check that the LDS size emitted correctly
; EG: .long 166120
@@ -14,6 +13,8 @@
; CI: .long 47180
; CI-NEXT: .long 38792
+; FUNC-LABEL: {{^}}local_memory:
+
; EG: LDS_WRITE
; SI-NOT: s_wqm_b64
; SI: ds_write_b32
@@ -29,15 +30,15 @@
define void @local_memory(i32 addrspace(1)* %out) {
entry:
%y.i = call i32 @llvm.r600.read.tidig.x() #0
- %arrayidx = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
+ %arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
%add = add nsw i32 %y.i, 1
%cmp = icmp eq i32 %add, 16
%.add = select i1 %cmp, i32 0, i32 %add
call void @llvm.AMDGPU.barrier.local()
- %arrayidx1 = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
- %0 = load i32 addrspace(3)* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i32 %y.i
+ %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
+ %0 = load i32, i32 addrspace(3)* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i
store i32 %0, i32 addrspace(1)* %arrayidx2, align 4
ret void
}
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
index 03e0f01..7fadb8d 100644
--- a/test/CodeGen/R600/loop-address.ll
+++ b/test/CodeGen/R600/loop-address.ll
@@ -17,7 +17,7 @@ for.body: ; preds = %for.body, %entry
%i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
%ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%i.07 = add nsw i32 %i.07.in, -1
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %ai.06
store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %ai.06, 1
%exitcond = icmp eq i32 %add, %iterations
diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll
index a0b00ab..810b34f 100644
--- a/test/CodeGen/R600/loop-idiom.ll
+++ b/test/CodeGen/R600/loop-idiom.ll
@@ -20,9 +20,9 @@ entry:
for.body:
%0 = phi i32 [0, %entry], [%4, %for.body]
- %1 = getelementptr i8 addrspace(3)* %in, i32 %0
- %2 = getelementptr i8* %dest, i32 %0
- %3 = load i8 addrspace(3)* %1
+ %1 = getelementptr i8, i8 addrspace(3)* %in, i32 %0
+ %2 = getelementptr i8, i8* %dest, i32 %0
+ %3 = load i8, i8 addrspace(3)* %1
store i8 %3, i8* %2
%4 = add i32 %0, 1
%5 = icmp eq i32 %4, %size
@@ -44,7 +44,7 @@ entry:
for.body:
%0 = phi i32 [0, %entry], [%2, %for.body]
- %1 = getelementptr i8* %dest, i32 %0
+ %1 = getelementptr i8, i8* %dest, i32 %0
store i8 0, i8* %1
%2 = add i32 %0, 1
%3 = icmp eq i32 %2, %size
diff --git a/test/CodeGen/R600/m0-spill.ll b/test/CodeGen/R600/m0-spill.ll
index 4dade82..1dddc85 100644
--- a/test/CodeGen/R600/m0-spill.ll
+++ b/test/CodeGen/R600/m0-spill.ll
@@ -12,8 +12,8 @@ main_body:
br i1 %cmp, label %if, label %else
if:
- %lds_ptr = getelementptr [64 x float] addrspace(3)* @lds, i32 0, i32 0
- %lds_data = load float addrspace(3)* %lds_ptr
+ %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
+ %lds_data = load float, float addrspace(3)* %lds_ptr
br label %endif
else:
diff --git a/test/CodeGen/R600/mad-combine.ll b/test/CodeGen/R600/mad-combine.ll
index 8c4e09b..bc07162 100644
--- a/test/CodeGen/R600/mad-combine.ll
+++ b/test/CodeGen/R600/mad-combine.ll
@@ -32,14 +32,14 @@ declare float @llvm.fmuladd.f32(float, float, float) #0
; SI: buffer_store_dword [[RESULT]]
define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
%mul = fmul float %a, %b
%fma = fadd float %mul, %c
@@ -69,17 +69,17 @@ define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrsp
; SI: s_endpgm
define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
- %d = load float addrspace(1)* %gep.3
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
%mul = fmul float %a, %b
%fma0 = fadd float %mul, %c
@@ -105,14 +105,14 @@ define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float a
; SI: buffer_store_dword [[RESULT]]
define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
%mul = fmul float %a, %b
%fma = fadd float %c, %mul
@@ -135,14 +135,14 @@ define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrsp
; SI: buffer_store_dword [[RESULT]]
define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
%mul = fmul float %a, %b
%fma = fsub float %mul, %c
@@ -172,17 +172,17 @@ define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float a
; SI: s_endpgm
define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
- %d = load float addrspace(1)* %gep.3
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
%mul = fmul float %a, %b
%fma0 = fsub float %mul, %c
@@ -207,14 +207,14 @@ define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, fl
; SI: buffer_store_dword [[RESULT]]
define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
%mul = fmul float %a, %b
%fma = fsub float %c, %mul
@@ -243,17 +243,17 @@ define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float a
; SI: s_endpgm
define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
- %d = load float addrspace(1)* %gep.3
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
%mul = fmul float %a, %b
%fma0 = fsub float %c, %mul
@@ -279,14 +279,14 @@ define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, fl
; SI: buffer_store_dword [[RESULT]]
define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
%mul = fmul float %a, %b
%mul.neg = fsub float -0.0, %mul
@@ -317,17 +317,17 @@ define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float a
; SI: s_endpgm
define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
- %d = load float addrspace(1)* %gep.3
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
%mul = fmul float %a, %b
%mul.neg = fsub float -0.0, %mul
@@ -360,17 +360,17 @@ define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %ou
; SI: s_endpgm
define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1
-
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
- %d = load float addrspace(1)* %gep.3
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
%mul = fmul float %a, %b
%mul.neg = fsub float -0.0, %mul
@@ -405,18 +405,18 @@ define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %ou
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
-
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
- %z = load float addrspace(1)* %gep.2
- %u = load float addrspace(1)* %gep.3
- %v = load float addrspace(1)* %gep.4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
+ %z = load float, float addrspace(1)* %gep.2
+ %u = load float, float addrspace(1)* %gep.3
+ %v = load float, float addrspace(1)* %gep.4
%tmp0 = fmul float %u, %v
%tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
@@ -451,18 +451,18 @@ define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %o
; SI: s_endpgm
define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
-
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
- %z = load float addrspace(1)* %gep.2
- %u = load float addrspace(1)* %gep.3
- %v = load float addrspace(1)* %gep.4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
+ %z = load float, float addrspace(1)* %gep.2
+ %u = load float, float addrspace(1)* %gep.3
+ %v = load float, float addrspace(1)* %gep.4
%tmp0 = fmul float %u, %v
%tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
@@ -496,18 +496,18 @@ define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %o
; SI: s_endpgm
define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
-
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
- %z = load float addrspace(1)* %gep.2
- %u = load float addrspace(1)* %gep.3
- %v = load float addrspace(1)* %gep.4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
+ %z = load float, float addrspace(1)* %gep.2
+ %u = load float, float addrspace(1)* %gep.3
+ %v = load float, float addrspace(1)* %gep.4
%tmp0 = fmul float %u, %v
%tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
@@ -542,18 +542,18 @@ define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %o
; SI: s_endpgm
define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
- %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3
- %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
-
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
- %z = load float addrspace(1)* %gep.2
- %u = load float addrspace(1)* %gep.3
- %v = load float addrspace(1)* %gep.4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
+ %z = load float, float addrspace(1)* %gep.2
+ %u = load float, float addrspace(1)* %gep.3
+ %v = load float, float addrspace(1)* %gep.4
%tmp0 = fmul float %u, %v
%tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
diff --git a/test/CodeGen/R600/mad-sub.ll b/test/CodeGen/R600/mad-sub.ll
index 7b4020d..aa4194f 100644
--- a/test/CodeGen/R600/mad-sub.ll
+++ b/test/CodeGen/R600/mad-sub.ll
@@ -12,15 +12,15 @@ declare float @llvm.fabs.f32(float) #0
define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%mul = fmul float %a, %b
%sub = fsub float %mul, %c
store float %sub, float addrspace(1)* %outgep, align 4
@@ -36,15 +36,15 @@ define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrs
define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%mul = fmul float %a, %b
%sub = fsub float %c, %mul
store float %sub, float addrspace(1)* %outgep, align 4
@@ -57,15 +57,15 @@ define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float a
define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr double addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr double addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr double, double addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr double addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr double addrspace(1)* %out, i64 %tid.ext
- %a = load double addrspace(1)* %gep0, align 8
- %b = load double addrspace(1)* %gep1, align 8
- %c = load double addrspace(1)* %gep2, align 8
+ %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
+ %a = load double, double addrspace(1)* %gep0, align 8
+ %b = load double, double addrspace(1)* %gep1, align 8
+ %c = load double, double addrspace(1)* %gep2, align 8
%mul = fmul double %a, %b
%sub = fsub double %mul, %c
store double %sub, double addrspace(1)* %outgep, align 8
@@ -81,15 +81,15 @@ define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double add
define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%c.abs = call float @llvm.fabs.f32(float %c) #0
%mul = fmul float %a, %b
%sub = fsub float %mul, %c.abs
@@ -106,15 +106,15 @@ define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float
define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%c.abs = call float @llvm.fabs.f32(float %c) #0
%mul = fmul float %a, %b
%sub = fsub float %c.abs, %mul
@@ -127,15 +127,15 @@ define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, fl
define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%nega = fsub float -0.000000e+00, %a
%negb = fsub float -0.000000e+00, %b
%mul = fmul float %nega, %negb
@@ -153,15 +153,15 @@ define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float a
define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%b.abs = call float @llvm.fabs.f32(float %b) #0
%mul = fmul float %a, %b.abs
%sub = fsub float %mul, %c
@@ -176,12 +176,12 @@ define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float
; SI: buffer_store_dword [[RESULT]]
define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%add = fadd float %r1, %r1
%r3 = fsub float %r2, %add
@@ -197,12 +197,12 @@ define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in)
; SI: buffer_store_dword [[RESULT]]
define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%add = fadd float %r1, %r1
%r3 = fsub float %add, %r2
diff --git a/test/CodeGen/R600/madak.ll b/test/CodeGen/R600/madak.ll
index 505a49b..933bb01 100644
--- a/test/CodeGen/R600/madak.ll
+++ b/test/CodeGen/R600/madak.ll
@@ -9,15 +9,15 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; GCN-LABEL: {{^}}madak_f32:
; GCN: buffer_load_dword [[VA:v[0-9]+]]
; GCN: buffer_load_dword [[VB:v[0-9]+]]
-; GCN: v_madak_f32 {{v[0-9]+}}, [[VB]], [[VA]], 0x41200000
+; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VB]], [[VA]], 0x41200000
define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
- %b = load float addrspace(1)* %in.b.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
%mul = fmul float %a, %b
%madak = fadd float %mul, 10.0
@@ -40,16 +40,16 @@ define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noa
define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %in.gep.2 = getelementptr float addrspace(1)* %in.gep.0, i32 2
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
- %out.gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %out.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+ %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
- %a = load float addrspace(1)* %in.gep.0, align 4
- %b = load float addrspace(1)* %in.gep.1, align 4
- %c = load float addrspace(1)* %in.gep.2, align 4
+ %a = load float, float addrspace(1)* %in.gep.0, align 4
+ %b = load float, float addrspace(1)* %in.gep.1, align 4
+ %c = load float, float addrspace(1)* %in.gep.2, align 4
%mul0 = fmul float %a, %b
%mul1 = fmul float %a, %c
@@ -63,13 +63,13 @@ define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1
; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
; GCN: buffer_load_dword [[VA:v[0-9]+]]
-; GCN: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
+; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
%mul = fmul float 4.0, %a
%madak = fadd float %mul, 10.0
@@ -86,12 +86,12 @@ define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addr
; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
- %b = load float addrspace(1)* %in.b.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
%mul = fmul float %a, %b
%madak = fadd float %mul, 4.0
@@ -108,10 +108,10 @@ define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrsp
; GCN: v_mad_f32 {{v[0-9]+}}, [[SB]], [[VA]], [[VK]]
define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
%mul = fmul float %a, %b
%madak = fadd float %mul, 10.0
@@ -127,10 +127,10 @@ define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)*
; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[SB]], [[VK]]
define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %b = load float addrspace(1)* %in.b.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
%mul = fmul float %a, %b
%madak = fadd float %mul, 10.0
@@ -155,12 +155,12 @@ define void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwin
; GCN: s_endpgm
define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
- %b = load float addrspace(1)* %in.b.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
%a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
@@ -177,12 +177,12 @@ define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float
; GCN: s_endpgm
define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
- %b = load float addrspace(1)* %in.b.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
%b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
diff --git a/test/CodeGen/R600/madmk.ll b/test/CodeGen/R600/madmk.ll
index 249e48e..71615e9 100644
--- a/test/CodeGen/R600/madmk.ll
+++ b/test/CodeGen/R600/madmk.ll
@@ -7,15 +7,15 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; GCN-LABEL: {{^}}madmk_f32:
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_madmk_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
+; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%mul = fmul float %a, 10.0
%madmk = fadd float %mul, %b
@@ -34,16 +34,16 @@ define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noa
define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %in.gep.2 = getelementptr float addrspace(1)* %in.gep.0, i32 2
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
- %out.gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %out.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+ %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
- %a = load float addrspace(1)* %in.gep.0, align 4
- %b = load float addrspace(1)* %in.gep.1, align 4
- %c = load float addrspace(1)* %in.gep.2, align 4
+ %a = load float, float addrspace(1)* %in.gep.0, align 4
+ %b = load float, float addrspace(1)* %in.gep.1, align 4
+ %c = load float, float addrspace(1)* %in.gep.2, align 4
%mul0 = fmul float %a, 10.0
%mul1 = fmul float %a, 10.0
@@ -62,12 +62,12 @@ define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1
; GCN: v_mad_f32 {{v[0-9]+}}, 4.0, [[VA]], [[VB]]
define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%mul = fmul float %a, 4.0
%madmk = fadd float %mul, %b
@@ -81,7 +81,7 @@ define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrsp
; GCN: s_endpgm
define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%mul = fmul float %a, 10.0
%madmk = fadd float %mul, %b
@@ -95,9 +95,9 @@ define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b)
; GCN: s_endpgm
define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.0, align 4
%mul = fmul float %a, 10.0
%madmk = fadd float %mul, %b
@@ -111,9 +111,9 @@ define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)*
; GCN: s_endpgm
define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
- %b = load float addrspace(1)* %gep.0, align 4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %b = load float, float addrspace(1)* %gep.0, align 4
%mul = fmul float %a, 10.0
%madmk = fadd float %mul, %b
@@ -127,12 +127,12 @@ define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float add
; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
@@ -148,12 +148,12 @@ define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float
; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}|
define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
@@ -169,10 +169,10 @@ define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float
; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0
define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
%mul = fmul float %a, 10.0
%madmk = fadd float %mul, 2.0
diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/R600/max.ll
index 20af993..1aa9e68 100644
--- a/test/CodeGen/R600/max.ll
+++ b/test/CodeGen/R600/max.ll
@@ -6,11 +6,11 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: v_max_i32_e32
define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp sge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -26,15 +26,33 @@ define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void
}
+; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i32:
+; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
+define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %cmp = icmp sge i32 %a, 9
+ %val = select i1 %cmp, i32 %a, i32 9
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32:
+; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
+define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %cmp = icmp sgt i32 %a, 9
+ %val = select i1 %cmp, i32 %a, i32 9
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; FUNC-LABEL: @v_test_imax_sgt_i32
; SI: v_max_i32_e32
define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp sgt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -54,11 +72,11 @@ define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: v_max_u32_e32
define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp uge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -78,11 +96,11 @@ define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: v_max_u32_e32
define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp ugt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
diff --git a/test/CodeGen/R600/max3.ll b/test/CodeGen/R600/max3.ll
index f905e17..cfb94b2 100644
--- a/test/CodeGen/R600/max3.ll
+++ b/test/CodeGen/R600/max3.ll
@@ -6,13 +6,13 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: v_max3_i32
define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
%icmp0 = icmp sgt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp sgt i32 %i0, %c
@@ -25,13 +25,13 @@ define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
; SI: v_max3_u32
define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
%icmp0 = icmp ugt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp ugt i32 %i0, %c
diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/R600/min.ll
index 00ba5c6..275e9a7 100644
--- a/test/CodeGen/R600/min.ll
+++ b/test/CodeGen/R600/min.ll
@@ -6,11 +6,11 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: v_min_i32_e32
define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp sle i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -30,11 +30,11 @@ define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: v_min_i32_e32
define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp slt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -50,15 +50,33 @@ define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void
}
+; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
+; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
+define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %cmp = icmp slt i32 %a, 8
+ %val = select i1 %cmp, i32 %a, i32 8
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
+; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
+define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %cmp = icmp sle i32 %a, 8
+ %val = select i1 %cmp, i32 %a, i32 8
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; FUNC-LABEL: @v_test_umin_ule_i32
; SI: v_min_u32_e32
define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp ule i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -78,11 +96,11 @@ define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: v_min_u32_e32
define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp ult i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -106,12 +124,12 @@ define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: s_endpgm
define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep0 = getelementptr i32 addrspace(1)* %out0, i32 %tid
- %outgep1 = getelementptr i1 addrspace(1)* %out1, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid
+ %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp ult i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep0, align 4
diff --git a/test/CodeGen/R600/min3.ll b/test/CodeGen/R600/min3.ll
index 6c11a65..38ef46d 100644
--- a/test/CodeGen/R600/min3.ll
+++ b/test/CodeGen/R600/min3.ll
@@ -6,13 +6,13 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: v_min3_i32
define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp slt i32 %i0, %c
@@ -25,13 +25,13 @@ define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
; SI: v_min3_u32
define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
%icmp0 = icmp ult i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp ult i32 %i0, %c
@@ -46,21 +46,21 @@ define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%tid2 = mul i32 %tid, 2
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
- %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
- %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
- %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+ %gep3 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid2
- %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
- %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+ %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
- %d = load i32 addrspace(1)* %gep3, align 4
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
+ %d = load i32, i32 addrspace(1)* %gep3, align 4
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
@@ -80,21 +80,21 @@ define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %ap
define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%tid2 = mul i32 %tid, 2
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
- %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
- %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
- %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+ %gep3 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid2
- %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
- %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+ %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
- %d = load i32 addrspace(1)* %gep3, align 4
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
+ %d = load i32, i32 addrspace(1)* %gep3, align 4
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
diff --git a/test/CodeGen/R600/missing-store.ll b/test/CodeGen/R600/missing-store.ll
index 8ddef35..4af9cdf 100644
--- a/test/CodeGen/R600/missing-store.ll
+++ b/test/CodeGen/R600/missing-store.ll
@@ -12,11 +12,11 @@
; SI: buffer_store_dword
; SI: s_endpgm
define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
- %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+ %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll
index 988e5c1..b19163f 100644
--- a/test/CodeGen/R600/mubuf.ll
+++ b/test/CodeGen/R600/mubuf.ll
@@ -11,8 +11,8 @@ declare i32 @llvm.r600.read.tidig.x() readnone
; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = getelementptr i32 addrspace(1)* %in, i64 1
- %1 = load i32 addrspace(1)* %0
+ %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
+ %1 = load i32, i32 addrspace(1)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -22,8 +22,8 @@ entry:
; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
- %0 = getelementptr i8 addrspace(1)* %in, i64 4095
- %1 = load i8 addrspace(1)* %0
+ %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
+ %1 = load i8, i8 addrspace(1)* %0
store i8 %1, i8 addrspace(1)* %out
ret void
}
@@ -34,8 +34,8 @@ entry:
; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0
define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = getelementptr i32 addrspace(1)* %in, i64 1024
- %1 = load i32 addrspace(1)* %0
+ %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024
+ %1 = load i32, i32 addrspace(1)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -46,9 +46,9 @@ entry:
; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0
define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
entry:
- %0 = getelementptr i32 addrspace(1)* %in, i64 %offset
- %1 = getelementptr i32 addrspace(1)* %0, i64 1
- %2 = load i32 addrspace(1)* %1
+ %0 = getelementptr i32, i32 addrspace(1)* %in, i64 %offset
+ %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
+ %2 = load i32, i32 addrspace(1)* %1
store i32 %2, i32 addrspace(1)* %out
ret void
}
@@ -57,8 +57,8 @@ entry:
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
main_body:
- %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+ %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
+ %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
@@ -76,8 +76,8 @@ main_body:
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
main_body:
- %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+ %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
+ %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
@@ -95,7 +95,7 @@ main_body:
; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
define void @mubuf_store0(i32 addrspace(1)* %out) {
entry:
- %0 = getelementptr i32 addrspace(1)* %out, i64 1
+ %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1
store i32 0, i32 addrspace(1)* %0
ret void
}
@@ -106,7 +106,7 @@ entry:
define void @mubuf_store1(i8 addrspace(1)* %out) {
entry:
- %0 = getelementptr i8 addrspace(1)* %out, i64 4095
+ %0 = getelementptr i8, i8 addrspace(1)* %out, i64 4095
store i8 0, i8 addrspace(1)* %0
ret void
}
@@ -117,7 +117,7 @@ entry:
; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0
define void @mubuf_store2(i32 addrspace(1)* %out) {
entry:
- %0 = getelementptr i32 addrspace(1)* %out, i64 1024
+ %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024
store i32 0, i32 addrspace(1)* %0
ret void
}
@@ -128,8 +128,8 @@ entry:
; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0
define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
entry:
- %0 = getelementptr i32 addrspace(1)* %out, i64 %offset
- %1 = getelementptr i32 addrspace(1)* %0, i64 1
+ %0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset
+ %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
store i32 0, i32 addrspace(1)* %1
ret void
}
@@ -144,7 +144,7 @@ define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 10
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10
store i32 99, i32 addrspace(1)* %out.gep, align 4
ret void
}
@@ -153,7 +153,7 @@ define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 32768
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
store i32 99, i32 addrspace(1)* %out.gep, align 4
ret void
}
@@ -162,7 +162,7 @@ define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
; CHECK: buffer_atomic_add v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
- %gep = getelementptr i32 addrspace(1)* %out, i32 32768
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst
ret void
}
@@ -171,7 +171,7 @@ define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
store i32 99, i32 addrspace(1)* %out.gep, align 4
ret void
}
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index 6f15e70..94e0f96 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -12,9 +12,9 @@
; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = mul <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -32,9 +32,9 @@ define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)
; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = mul <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -58,8 +58,8 @@ define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
; SI: v_mul_lo_i32
; SI: buffer_store_dword
define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%mul = mul i64 %b, %a
%trunc = trunc i64 %mul to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8
@@ -88,7 +88,7 @@ entry:
; SI-DAG: v_mul_hi_i32
; SI: s_endpgm
define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ext = sext i32 %val to i64
%mul = mul i64 %ext, 80
store i64 %mul, i64 addrspace(1)* %out, align 8
@@ -100,7 +100,7 @@ define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
; SI: s_endpgm
define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ext = sext i32 %val to i64
%mul = mul i64 %ext, 9
store i64 %mul, i64 addrspace(1)* %out, align 8
@@ -123,9 +123,9 @@ define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
; FUNC-LABEL: {{^}}v_mul_i32:
; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = mul i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -148,8 +148,8 @@ define void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
; FUNC-LABEL: {{^}}v_mul_i64:
; SI: v_mul_lo_i32
define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%mul = mul i64 %a, %b
store i64 %mul, i64 addrspace(1)* %out, align 8
ret void
@@ -163,7 +163,7 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i32 addrspace(1)* %in
+ %1 = load i32, i32 addrspace(1)* %in
br label %endif
else:
@@ -186,7 +186,7 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i64 addrspace(1)* %in
+ %1 = load i64, i64 addrspace(1)* %in
br label %endif
else:
diff --git a/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
index 532edf0..9a814b5 100644
--- a/test/CodeGen/R600/no-initializer-constant-addrspace.ll
+++ b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
@@ -6,7 +6,7 @@
; FUNC-LABEL: {{^}}load_extern_const_init:
define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
- %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
+ %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
@@ -15,7 +15,7 @@ define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
; FUNC-LABEL: {{^}}load_undef_const_init:
define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
- %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
+ %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/R600/no-shrink-extloads.ll b/test/CodeGen/R600/no-shrink-extloads.ll
index 3079492..e4328ec 100644
--- a/test/CodeGen/R600/no-shrink-extloads.ll
+++ b/test/CodeGen/R600/no-shrink-extloads.ll
@@ -23,9 +23,9 @@ define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounw
; SI: buffer_store_short v
define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i16 addrspace(1)* %out, i32 %tid
- %load = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+ %load = load i32, i32 addrspace(1)* %gep.in
%trunc = trunc i32 %load to i16
store i16 %trunc, i16 addrspace(1)* %gep.out
ret void
@@ -45,9 +45,9 @@ define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwin
; SI: buffer_store_byte v
define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
- %load = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %load = load i32, i32 addrspace(1)* %gep.in
%trunc = trunc i32 %load to i8
store i8 %trunc, i8 addrspace(1)* %gep.out
ret void
@@ -67,9 +67,9 @@ define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwin
; SI: buffer_store_byte v
define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i1 addrspace(1)* %out, i32 %tid
- %load = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
+ %load = load i32, i32 addrspace(1)* %gep.in
%trunc = trunc i32 %load to i1
store i1 %trunc, i1 addrspace(1)* %gep.out
ret void
@@ -89,9 +89,9 @@ define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounw
; SI: buffer_store_dword v
define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %load = load i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %load = load i64, i64 addrspace(1)* %gep.in
%trunc = trunc i64 %load to i32
store i32 %trunc, i32 addrspace(1)* %gep.out
ret void
@@ -112,9 +112,9 @@ define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
; SI: buffer_store_dword v
define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %load = load i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %load = load i64, i64 addrspace(1)* %gep.in
%srl = lshr i64 %load, 32
%trunc = trunc i64 %srl to i32
store i32 %trunc, i32 addrspace(1)* %gep.out
@@ -136,9 +136,9 @@ define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwin
; SI: buffer_store_byte v
define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i16 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
- %load = load i16 addrspace(1)* %gep.in
+ %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %load = load i16, i16 addrspace(1)* %gep.in
%trunc = trunc i16 %load to i8
store i8 %trunc, i8 addrspace(1)* %gep.out
ret void
@@ -159,9 +159,9 @@ define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
; SI: buffer_store_byte v
define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
- %load = load i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %load = load i64, i64 addrspace(1)* %gep.in
%srl = lshr i64 %load, 32
%trunc = trunc i64 %srl to i8
store i8 %trunc, i8 addrspace(1)* %gep.out
@@ -182,9 +182,9 @@ define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwin
; SI: buffer_store_byte v
define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
- %load = load i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %load = load i64, i64 addrspace(1)* %gep.in
%trunc = trunc i64 %load to i8
store i8 %trunc, i8 addrspace(1)* %gep.out
ret void
diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll
index 88a8145..4bf748e 100644
--- a/test/CodeGen/R600/operand-folding.ll
+++ b/test/CodeGen/R600/operand-folding.ll
@@ -10,7 +10,7 @@ entry:
if:
%id = call i32 @llvm.r600.read.tidig.x()
%offset = add i32 %fold, %id
- %tmp1 = getelementptr i32 addrspace(1)* %out, i32 %offset
+ %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
store i32 0, i32 addrspace(1)* %tmp1
br label %endif
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 78879a8..1c04090 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -10,9 +10,9 @@
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = or <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -29,9 +29,9 @@ define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in)
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = or <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -48,7 +48,7 @@ define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
; FUNC-LABEL: {{^}}vector_or_i32:
; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
- %loada = load i32 addrspace(1)* %a
+ %loada = load i32, i32 addrspace(1)* %a
%or = or i32 %loada, %b
store i32 %or, i32 addrspace(1)* %out
ret void
@@ -65,7 +65,7 @@ define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
; FUNC-LABEL: {{^}}vector_or_literal_i32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
- %loada = load i32 addrspace(1)* %a, align 4
+ %loada = load i32, i32 addrspace(1)* %a, align 4
%or = or i32 %loada, 65535
store i32 %or, i32 addrspace(1)* %out, align 4
ret void
@@ -74,7 +74,7 @@ define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a,
; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
- %loada = load i32 addrspace(1)* %a, align 4
+ %loada = load i32, i32 addrspace(1)* %a, align 4
%or = or i32 %loada, 4
store i32 %or, i32 addrspace(1)* %out, align 4
ret void
@@ -95,8 +95,8 @@ define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
; SI: v_or_b32_e32 v{{[0-9]}}
; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 8
- %loadb = load i64 addrspace(1)* %a, align 8
+ %loada = load i64, i64 addrspace(1)* %a, align 8
+ %loadb = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, %loadb
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -106,7 +106,7 @@ define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI: v_or_b32_e32 v{{[0-9]}}
; SI: v_or_b32_e32 v{{[0-9]}}
define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
- %loada = load i64 addrspace(1)* %a
+ %loada = load i64, i64 addrspace(1)* %a
%or = or i64 %loada, %b
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -120,7 +120,7 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 8
+ %loada = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 22470723082367
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -133,7 +133,7 @@ define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
; SI: s_endpgm
define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 8
+ %loada = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 8
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -155,15 +155,15 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
; FUNC-LABEL: {{^}}or_i1:
; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
-; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
- %a = load float addrspace(1)* %in0
- %b = load float addrspace(1)* %in1
+; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
+define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+ %a = load float, float addrspace(1)* %in0
+ %b = load float, float addrspace(1)* %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 0.000000e+00
%or = or i1 %acmp, %bcmp
- %result = select i1 %or, float %a, float %b
- store float %result, float addrspace(1)* %out
+ %result = zext i1 %or to i32
+ store i32 %result, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll
index 82b1150..f32b044 100644
--- a/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/test/CodeGen/R600/parallelandifcollapse.ll
@@ -23,14 +23,14 @@ entry:
%c1 = alloca i32, align 4
%d1 = alloca i32, align 4
%data = alloca i32, align 4
- %0 = load i32* %a0, align 4
- %1 = load i32* %b0, align 4
+ %0 = load i32, i32* %a0, align 4
+ %1 = load i32, i32* %b0, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %entry
- %2 = load i32* %c0, align 4
- %3 = load i32* %d0, align 4
+ %2 = load i32, i32* %c0, align 4
+ %3 = load i32, i32* %d0, align 4
%cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.end
@@ -39,14 +39,14 @@ if.then: ; preds = %land.lhs.true
br label %if.end
if.end: ; preds = %if.then, %land.lhs.true, %entry
- %4 = load i32* %a1, align 4
- %5 = load i32* %b1, align 4
+ %4 = load i32, i32* %a1, align 4
+ %5 = load i32, i32* %b1, align 4
%cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.end6
land.lhs.true3: ; preds = %if.end
- %6 = load i32* %c1, align 4
- %7 = load i32* %d1, align 4
+ %6 = load i32, i32* %c1, align 4
+ %7 = load i32, i32* %d1, align 4
%cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.end6
diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/R600/parallelorifcollapse.ll
index feca688..1da1e91b8 100644
--- a/test/CodeGen/R600/parallelorifcollapse.ll
+++ b/test/CodeGen/R600/parallelorifcollapse.ll
@@ -23,14 +23,14 @@ entry:
%c1 = alloca i32, align 4
%d1 = alloca i32, align 4
%data = alloca i32, align 4
- %0 = load i32* %a0, align 4
- %1 = load i32* %b0, align 4
+ %0 = load i32, i32* %a0, align 4
+ %1 = load i32, i32* %b0, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.else
land.lhs.true: ; preds = %entry
- %2 = load i32* %c0, align 4
- %3 = load i32* %d0, align 4
+ %2 = load i32, i32* %c0, align 4
+ %3 = load i32, i32* %d0, align 4
%cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.else
@@ -42,14 +42,14 @@ if.else: ; preds = %land.lhs.true, %ent
br label %if.end
if.end: ; preds = %if.else, %if.then
- %4 = load i32* %a1, align 4
- %5 = load i32* %b1, align 4
+ %4 = load i32, i32* %a1, align 4
+ %5 = load i32, i32* %b1, align 4
%cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.else6
land.lhs.true3: ; preds = %if.end
- %6 = load i32* %c1, align 4
- %7 = load i32* %d1, align 4
+ %6 = load i32, i32* %c1, align 4
+ %7 = load i32, i32* %d1, align 4
%cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.else6
diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll
index 3ceb0c0..a008ac9 100644
--- a/test/CodeGen/R600/private-memory-atomics.ll
+++ b/test/CodeGen/R600/private-memory-atomics.ll
@@ -7,11 +7,11 @@
define void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
store i32 0, i32* %tmp1
store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
%tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel
store i32 %tmp4, i32 addrspace(1)* %out
ret void
@@ -20,11 +20,11 @@ entry:
define void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
store i32 0, i32* %tmp1
store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
%tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic
%val = extractvalue { i32, i1 } %tmp4, 0
store i32 %val, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll
index 10590a9..6b18a19 100644
--- a/test/CodeGen/R600/private-memory-broken.ll
+++ b/test/CodeGen/R600/private-memory-broken.ll
@@ -10,11 +10,11 @@ declare i32 @foo(i32*) nounwind
define void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
store i32 0, i32* %tmp1
store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
%val = call i32 @foo(i32* %tmp3) nounwind
store i32 %val, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index b03029c..1c56297 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -23,19 +23,19 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
- %0 = load i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
+ %0 = load i32, i32 addrspace(1)* %in, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
store i32 4, i32* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
- %1 = load i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+ %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32* %arrayidx10, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
+ %2 = load i32, i32* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32* %arrayidx12
- %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
+ %3 = load i32, i32* %arrayidx12
+ %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
}
@@ -57,18 +57,18 @@ define void @multiple_structs(i32 addrspace(1)* %out) {
entry:
%a = alloca %struct.point
%b = alloca %struct.point
- %a.x.ptr = getelementptr %struct.point* %a, i32 0, i32 0
- %a.y.ptr = getelementptr %struct.point* %a, i32 0, i32 1
- %b.x.ptr = getelementptr %struct.point* %b, i32 0, i32 0
- %b.y.ptr = getelementptr %struct.point* %b, i32 0, i32 1
+ %a.x.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
+ %a.y.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 1
+ %b.x.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
+ %b.y.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 1
store i32 0, i32* %a.x.ptr
store i32 1, i32* %a.y.ptr
store i32 2, i32* %b.x.ptr
store i32 3, i32* %b.y.ptr
- %a.indirect.ptr = getelementptr %struct.point* %a, i32 0, i32 0
- %b.indirect.ptr = getelementptr %struct.point* %b, i32 0, i32 0
- %a.indirect = load i32* %a.indirect.ptr
- %b.indirect = load i32* %b.indirect.ptr
+ %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
+ %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
+ %a.indirect = load i32, i32* %a.indirect.ptr
+ %b.indirect = load i32, i32* %b.indirect.ptr
%0 = add i32 %a.indirect, %b.indirect
store i32 %0, i32 addrspace(1)* %out
ret void
@@ -86,21 +86,21 @@ define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%prv_array_const = alloca [2 x i32]
%prv_array = alloca [2 x i32]
- %a = load i32 addrspace(1)* %in
- %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %b = load i32 addrspace(1)* %b_src_ptr
- %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+ %a = load i32, i32 addrspace(1)* %in
+ %b_src_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %b = load i32, i32 addrspace(1)* %b_src_ptr
+ %a_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
store i32 %a, i32* %a_dst_ptr
- %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
+ %b_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
store i32 %b, i32* %b_dst_ptr
br label %for.body
for.body:
%inc = phi i32 [0, %entry], [%count, %for.body]
- %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
- %x = load i32* %x_ptr
- %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
- %y = load i32* %y_ptr
+ %x_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
+ %x = load i32, i32* %x_ptr
+ %y_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
+ %y = load i32, i32* %y_ptr
%xy = add i32 %x, %y
store i32 %xy, i32* %y_ptr
%count = add i32 %inc, 1
@@ -108,8 +108,8 @@ for.body:
br i1 %done, label %for.end, label %for.body
for.end:
- %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
- %value = load i32* %value_ptr
+ %value_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
+ %value = load i32, i32* %value_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
@@ -124,12 +124,12 @@ for.end:
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i16]
- %1 = getelementptr [2 x i16]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i16]* %0, i32 0, i32 1
+ %1 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 1
store i16 0, i16* %1
store i16 1, i16* %2
- %3 = getelementptr [2 x i16]* %0, i32 0, i32 %index
- %4 = load i16* %3
+ %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index
+ %4 = load i16, i16* %3
%5 = sext i16 %4 to i32
store i32 %5, i32 addrspace(1)* %out
ret void
@@ -144,12 +144,12 @@ entry:
define void @char_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i8]
- %1 = getelementptr [2 x i8]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i8]* %0, i32 0, i32 1
+ %1 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 1
store i8 0, i8* %1
store i8 1, i8* %2
- %3 = getelementptr [2 x i8]* %0, i32 0, i32 %index
- %4 = load i8* %3
+ %3 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 %index
+ %4 = load i8, i8* %3
%5 = sext i8 %4 to i32
store i32 %5, i32 addrspace(1)* %out
ret void
@@ -167,12 +167,12 @@ entry:
define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [2 x i32]
- %1 = getelementptr [2 x i32]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i32]* %0, i32 0, i32 1
+ %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1
store i32 0, i32* %1
store i32 1, i32* %2
- %3 = getelementptr [2 x i32]* %0, i32 0, i32 %in
- %4 = load i32* %3
+ %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
+ %4 = load i32, i32* %3
%5 = call i32 @llvm.r600.read.tidig.x()
%6 = add i32 %4, %5
store i32 %6, i32 addrspace(1)* %out
@@ -190,20 +190,20 @@ define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [3 x i8], align 1
%1 = alloca [2 x i8], align 1
- %2 = getelementptr [3 x i8]* %0, i32 0, i32 0
- %3 = getelementptr [3 x i8]* %0, i32 0, i32 1
- %4 = getelementptr [3 x i8]* %0, i32 0, i32 2
- %5 = getelementptr [2 x i8]* %1, i32 0, i32 0
- %6 = getelementptr [2 x i8]* %1, i32 0, i32 1
+ %2 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 0
+ %3 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 1
+ %4 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 2
+ %5 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 0
+ %6 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 1
store i8 0, i8* %2
store i8 1, i8* %3
store i8 2, i8* %4
store i8 1, i8* %5
store i8 0, i8* %6
- %7 = getelementptr [3 x i8]* %0, i32 0, i32 %in
- %8 = getelementptr [2 x i8]* %1, i32 0, i32 %in
- %9 = load i8* %7
- %10 = load i8* %8
+ %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in
+ %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in
+ %9 = load i8, i8* %7
+ %10 = load i8, i8* %8
%11 = add i8 %9, %10
%12 = sext i8 %11 to i32
store i32 %12, i32 addrspace(1)* %out
@@ -213,12 +213,12 @@ entry:
define void @char_array_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x [2 x i8]]
- %gep0 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
+ %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
store i8 0, i8* %gep0
store i8 1, i8* %gep1
- %gep2 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i8* %gep2
+ %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
+ %load = load i8, i8* %gep2
%sext = sext i8 %load to i32
store i32 %sext, i32 addrspace(1)* %out
ret void
@@ -227,12 +227,12 @@ entry:
define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
+ %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
store i32 0, i32* %gep0
store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32* %gep2
+ %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, i32* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -240,12 +240,12 @@ entry:
define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x [2 x i64]]
- %gep0 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
+ %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
store i64 0, i64* %gep0
store i64 1, i64* %gep1
- %gep2 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i64* %gep2
+ %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
+ %load = load i64, i64* %gep2
store i64 %load, i64 addrspace(1)* %out
ret void
}
@@ -255,12 +255,12 @@ entry:
define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x [2 x %struct.pair32]]
- %gep0 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
+ %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
+ %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
store i32 0, i32* %gep0
store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
- %load = load i32* %gep2
+ %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
+ %load = load i32, i32* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -268,12 +268,12 @@ entry:
define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x %struct.pair32]
- %gep0 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
+ %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
+ %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
store i32 0, i32* %gep0
store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
- %load = load i32* %gep2
+ %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
+ %load = load i32, i32* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -281,13 +281,13 @@ entry:
define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
store i32 0, i32* %tmp1
store i32 1, i32* %tmp2
%cmp = icmp eq i32 %in, 0
%sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
- %load = load i32* %sel
+ %load = load i32, i32* %sel
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -301,13 +301,13 @@ entry:
; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5
define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%alloca = alloca [16 x i32]
- %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 %a
+ %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
store i32 5, i32* %tmp0
%tmp1 = ptrtoint [16 x i32]* %alloca to i32
%tmp2 = add i32 %tmp1, 5
%tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = getelementptr i32* %tmp3, i32 %b
- %tmp5 = load i32* %tmp4
+ %tmp4 = getelementptr i32, i32* %tmp3, i32 %b
+ %tmp5 = load i32, i32* %tmp4
store i32 %tmp5, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/R600/pv-packing.ll
index e5615b9..abeae56 100644
--- a/test/CodeGen/R600/pv-packing.ll
+++ b/test/CodeGen/R600/pv-packing.ll
@@ -14,8 +14,8 @@ main_body:
%6 = extractelement <4 x float> %reg3, i32 0
%7 = extractelement <4 x float> %reg3, i32 1
%8 = extractelement <4 x float> %reg3, i32 2
- %9 = load <4 x float> addrspace(8)* null
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %9 = load <4 x float>, <4 x float> addrspace(8)* null
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
%12 = fmul float %0, %3
%13 = fadd float %12, %6
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 1908f15..9a57dd1 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -33,63 +33,63 @@ main_body:
%25 = extractelement <4 x float> %reg7, i32 1
%26 = extractelement <4 x float> %reg7, i32 2
%27 = extractelement <4 x float> %reg7, i32 3
- %28 = load <4 x float> addrspace(8)* null
+ %28 = load <4 x float>, <4 x float> addrspace(8)* null
%29 = extractelement <4 x float> %28, i32 0
%30 = fmul float %0, %29
- %31 = load <4 x float> addrspace(8)* null
+ %31 = load <4 x float>, <4 x float> addrspace(8)* null
%32 = extractelement <4 x float> %31, i32 1
%33 = fmul float %0, %32
- %34 = load <4 x float> addrspace(8)* null
+ %34 = load <4 x float>, <4 x float> addrspace(8)* null
%35 = extractelement <4 x float> %34, i32 2
%36 = fmul float %0, %35
- %37 = load <4 x float> addrspace(8)* null
+ %37 = load <4 x float>, <4 x float> addrspace(8)* null
%38 = extractelement <4 x float> %37, i32 3
%39 = fmul float %0, %38
- %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%41 = extractelement <4 x float> %40, i32 0
%42 = fmul float %1, %41
%43 = fadd float %42, %30
- %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%45 = extractelement <4 x float> %44, i32 1
%46 = fmul float %1, %45
%47 = fadd float %46, %33
- %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%49 = extractelement <4 x float> %48, i32 2
%50 = fmul float %1, %49
%51 = fadd float %50, %36
- %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%53 = extractelement <4 x float> %52, i32 3
%54 = fmul float %1, %53
%55 = fadd float %54, %39
- %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%57 = extractelement <4 x float> %56, i32 0
%58 = fmul float %2, %57
%59 = fadd float %58, %43
- %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%61 = extractelement <4 x float> %60, i32 1
%62 = fmul float %2, %61
%63 = fadd float %62, %47
- %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%65 = extractelement <4 x float> %64, i32 2
%66 = fmul float %2, %65
%67 = fadd float %66, %51
- %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%69 = extractelement <4 x float> %68, i32 3
%70 = fmul float %2, %69
%71 = fadd float %70, %55
- %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%73 = extractelement <4 x float> %72, i32 0
%74 = fmul float %3, %73
%75 = fadd float %74, %59
- %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%77 = extractelement <4 x float> %76, i32 1
%78 = fmul float %3, %77
%79 = fadd float %78, %63
- %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%81 = extractelement <4 x float> %80, i32 2
%82 = fmul float %3, %81
%83 = fadd float %82, %67
- %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %84 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%85 = extractelement <4 x float> %84, i32 3
%86 = fmul float %3, %85
%87 = fadd float %86, %71
@@ -107,15 +107,15 @@ main_body:
%99 = fmul float %4, %98
%100 = fmul float %5, %98
%101 = fmul float %6, %98
- %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %102 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%103 = extractelement <4 x float> %102, i32 0
%104 = fmul float %103, %8
%105 = fadd float %104, %20
- %106 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %106 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%107 = extractelement <4 x float> %106, i32 1
%108 = fmul float %107, %9
%109 = fadd float %108, %21
- %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %110 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%111 = extractelement <4 x float> %110, i32 2
%112 = fmul float %111, %10
%113 = fadd float %112, %22
@@ -123,11 +123,11 @@ main_body:
%115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
%116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
%117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
- %118 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%119 = extractelement <4 x float> %118, i32 0
- %120 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %120 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%121 = extractelement <4 x float> %120, i32 1
- %122 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %122 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%123 = extractelement <4 x float> %122, i32 2
%124 = insertelement <4 x float> undef, float %99, i32 0
%125 = insertelement <4 x float> %124, float %100, i32 1
@@ -138,11 +138,11 @@ main_body:
%130 = insertelement <4 x float> %129, float %123, i32 2
%131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3
%132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131)
- %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %133 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%134 = extractelement <4 x float> %133, i32 0
- %135 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %135 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%136 = extractelement <4 x float> %135, i32 1
- %137 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %137 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%138 = extractelement <4 x float> %137, i32 2
%139 = insertelement <4 x float> undef, float %99, i32 0
%140 = insertelement <4 x float> %139, float %100, i32 1
@@ -153,31 +153,31 @@ main_body:
%145 = insertelement <4 x float> %144, float %138, i32 2
%146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3
%147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146)
- %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %148 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%149 = extractelement <4 x float> %148, i32 0
%150 = fmul float %149, %8
- %151 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %151 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%152 = extractelement <4 x float> %151, i32 1
%153 = fmul float %152, %9
- %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %154 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%155 = extractelement <4 x float> %154, i32 2
%156 = fmul float %155, %10
- %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %157 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%158 = extractelement <4 x float> %157, i32 0
%159 = fmul float %158, %12
- %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %160 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%161 = extractelement <4 x float> %160, i32 1
%162 = fmul float %161, %13
- %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %163 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%164 = extractelement <4 x float> %163, i32 2
%165 = fmul float %164, %14
- %166 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %166 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
%167 = extractelement <4 x float> %166, i32 0
%168 = fmul float %167, %16
- %169 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %169 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
%170 = extractelement <4 x float> %169, i32 1
%171 = fmul float %170, %17
- %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %172 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
%173 = extractelement <4 x float> %172, i32 2
%174 = fmul float %173, %18
%175 = fcmp uge float %132, 0.000000e+00
diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll
index 7d72856..7cb8019 100644
--- a/test/CodeGen/R600/r600-export-fix.ll
+++ b/test/CodeGen/R600/r600-export-fix.ll
@@ -16,83 +16,83 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = extractelement <4 x float> %reg1, i32 2
%3 = extractelement <4 x float> %reg1, i32 3
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%5 = extractelement <4 x float> %4, i32 0
%6 = fmul float %5, %0
- %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%8 = extractelement <4 x float> %7, i32 1
%9 = fmul float %8, %0
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%11 = extractelement <4 x float> %10, i32 2
%12 = fmul float %11, %0
- %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%14 = extractelement <4 x float> %13, i32 3
%15 = fmul float %14, %0
- %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%17 = extractelement <4 x float> %16, i32 0
%18 = fmul float %17, %1
%19 = fadd float %18, %6
- %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%21 = extractelement <4 x float> %20, i32 1
%22 = fmul float %21, %1
%23 = fadd float %22, %9
- %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%25 = extractelement <4 x float> %24, i32 2
%26 = fmul float %25, %1
%27 = fadd float %26, %12
- %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%29 = extractelement <4 x float> %28, i32 3
%30 = fmul float %29, %1
%31 = fadd float %30, %15
- %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%33 = extractelement <4 x float> %32, i32 0
%34 = fmul float %33, %2
%35 = fadd float %34, %19
- %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%37 = extractelement <4 x float> %36, i32 1
%38 = fmul float %37, %2
%39 = fadd float %38, %23
- %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%41 = extractelement <4 x float> %40, i32 2
%42 = fmul float %41, %2
%43 = fadd float %42, %27
- %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%45 = extractelement <4 x float> %44, i32 3
%46 = fmul float %45, %2
%47 = fadd float %46, %31
- %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%49 = extractelement <4 x float> %48, i32 0
%50 = fmul float %49, %3
%51 = fadd float %50, %35
- %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%53 = extractelement <4 x float> %52, i32 1
%54 = fmul float %53, %3
%55 = fadd float %54, %39
- %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%57 = extractelement <4 x float> %56, i32 2
%58 = fmul float %57, %3
%59 = fadd float %58, %43
- %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%61 = extractelement <4 x float> %60, i32 3
%62 = fmul float %61, %3
%63 = fadd float %62, %47
- %64 = load <4 x float> addrspace(8)* null
+ %64 = load <4 x float>, <4 x float> addrspace(8)* null
%65 = extractelement <4 x float> %64, i32 0
- %66 = load <4 x float> addrspace(8)* null
+ %66 = load <4 x float>, <4 x float> addrspace(8)* null
%67 = extractelement <4 x float> %66, i32 1
- %68 = load <4 x float> addrspace(8)* null
+ %68 = load <4 x float>, <4 x float> addrspace(8)* null
%69 = extractelement <4 x float> %68, i32 2
- %70 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %70 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%71 = extractelement <4 x float> %70, i32 0
- %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%73 = extractelement <4 x float> %72, i32 1
- %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%75 = extractelement <4 x float> %74, i32 2
- %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%77 = extractelement <4 x float> %76, i32 0
- %78 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %78 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%79 = extractelement <4 x float> %78, i32 1
- %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%81 = extractelement <4 x float> %80, i32 2
%82 = insertelement <4 x float> undef, float %51, i32 0
%83 = insertelement <4 x float> %82, float %55, i32 1
diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
index dddc9de..c7b9d65 100644
--- a/test/CodeGen/R600/r600cfg.ll
+++ b/test/CodeGen/R600/r600cfg.ll
@@ -83,7 +83,7 @@ ELSE45: ; preds = %ENDIF40
ENDIF43: ; preds = %ELSE45, %IF44
%.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
%52 = bitcast i32 %.sink to float
- %53 = load <4 x float> addrspace(8)* null
+ %53 = load <4 x float>, <4 x float> addrspace(8)* null
%54 = extractelement <4 x float> %53, i32 0
%55 = bitcast float %54 to i32
br label %LOOP47
diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll
index 2b49f97..de6bfb3 100644
--- a/test/CodeGen/R600/register-count-comments.ll
+++ b/test/CodeGen/R600/register-count-comments.ll
@@ -9,11 +9,11 @@ declare i32 @llvm.SI.tid() nounwind readnone
; SI: ; NumVgprs: {{[0-9]+}}
define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind {
%tid = call i32 @llvm.SI.tid() nounwind readnone
- %aptr = getelementptr i32 addrspace(1)* %abase, i32 %tid
- %bptr = getelementptr i32 addrspace(1)* %bbase, i32 %tid
- %outptr = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid
+ %bptr = getelementptr i32, i32 addrspace(1)* %bbase, i32 %tid
+ %outptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%result = add i32 %a, %b
store i32 %result, i32 addrspace(1)* %outptr, align 4
ret void
diff --git a/test/CodeGen/R600/reorder-stores.ll b/test/CodeGen/R600/reorder-stores.ll
index ea50d5e..187650f 100644
--- a/test/CodeGen/R600/reorder-stores.ll
+++ b/test/CodeGen/R600/reorder-stores.ll
@@ -12,8 +12,8 @@
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
- %tmp1 = load <2 x double> addrspace(1)* %x, align 16
- %tmp4 = load <2 x double> addrspace(1)* %y, align 16
+ %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
+ %tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16
store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16
ret void
@@ -26,8 +26,8 @@ define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocap
; SI: ds_write_b64
; SI: s_endpgm
define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
- %tmp1 = load <2 x double> addrspace(3)* %x, align 16
- %tmp4 = load <2 x double> addrspace(3)* %y, align 16
+ %tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
+ %tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16
store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16
ret void
@@ -76,8 +76,8 @@ define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace
; SI: buffer_store_dword
; SI: s_endpgm
define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
- %tmp1 = load <8 x i32> addrspace(1)* %x, align 32
- %tmp4 = load <8 x i32> addrspace(1)* %y, align 32
+ %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
+ %tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32
store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32
ret void
@@ -91,8 +91,8 @@ define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* no
; SI: ds_write_b64
; SI: s_endpgm
define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
- %tmp1 = load <2 x i32> addrspace(3)* %x, align 8
- %tmp4 = load <2 x i32> addrspace(3)* %y, align 8
+ %tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
+ %tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8
%tmp1ext = zext <2 x i32> %tmp1 to <2 x i64>
%tmp4ext = zext <2 x i32> %tmp4 to <2 x i64>
%tmp7 = add <2 x i64> %tmp1ext, <i64 1, i64 1>
diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll
index 6da17a4..3f4ceb7 100644
--- a/test/CodeGen/R600/rotl.i64.ll
+++ b/test/CodeGen/R600/rotl.i64.ll
@@ -28,8 +28,8 @@ entry:
; BOTH: s_endpgm
define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
- %x = load i64 addrspace(1)* %xptr, align 8
- %y = load i64 addrspace(1)* %yptr, align 8
+ %x = load i64, i64 addrspace(1)* %xptr, align 8
+ %y = load i64, i64 addrspace(1)* %yptr, align 8
%tmp0 = shl i64 %x, %y
%tmp1 = sub i64 64, %y
%tmp2 = lshr i64 %x, %tmp1
diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll
index f1d1d26..586de44 100644
--- a/test/CodeGen/R600/rotr.i64.ll
+++ b/test/CodeGen/R600/rotr.i64.ll
@@ -26,8 +26,8 @@ entry:
; BOTH: v_or_b32
define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
- %x = load i64 addrspace(1)* %xptr, align 8
- %y = load i64 addrspace(1)* %yptr, align 8
+ %x = load i64, i64 addrspace(1)* %xptr, align 8
+ %y = load i64, i64 addrspace(1)* %yptr, align 8
%tmp0 = sub i64 64, %y
%tmp1 = shl i64 %x, %tmp0
%tmp2 = lshr i64 %x, %y
@@ -50,8 +50,8 @@ entry:
; BOTH-LABEL: {{^}}v_rotr_v2i64:
define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
entry:
- %x = load <2 x i64> addrspace(1)* %xptr, align 8
- %y = load <2 x i64> addrspace(1)* %yptr, align 8
+ %x = load <2 x i64>, <2 x i64> addrspace(1)* %xptr, align 8
+ %y = load <2 x i64>, <2 x i64> addrspace(1)* %yptr, align 8
%tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
%tmp1 = shl <2 x i64> %x, %tmp0
%tmp2 = lshr <2 x i64> %x, %y
diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll
index b8a23df..b67b800 100644
--- a/test/CodeGen/R600/rsq.ll
+++ b/test/CodeGen/R600/rsq.ll
@@ -9,7 +9,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
; SI: v_rsq_f32_e32
; SI: s_endpgm
define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float addrspace(1)* %in, align 4
+ %val = load float, float addrspace(1)* %in, align 4
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
%div = fdiv float 1.0, %sqrt
store float %div, float addrspace(1)* %out, align 4
@@ -21,7 +21,7 @@ define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noali
; SI-SAFE: v_sqrt_f64_e32
; SI: s_endpgm
define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
- %val = load double addrspace(1)* %in, align 4
+ %val = load double, double addrspace(1)* %in, align 4
%sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
%div = fdiv double 1.0, %sqrt
store double %div, double addrspace(1)* %out, align 4
@@ -57,14 +57,14 @@ define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind
; SI: s_endpgm
define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
%x = call float @llvm.sqrt.f32(float %a)
%y = fmul float %x, %b
diff --git a/test/CodeGen/R600/s_movk_i32.ll b/test/CodeGen/R600/s_movk_i32.ll
index 8be2d1d..6b1a36c 100644
--- a/test/CodeGen/R600/s_movk_i32.ll
+++ b/test/CodeGen/R600/s_movk_i32.ll
@@ -9,7 +9,7 @@
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -23,7 +23,7 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -37,7 +37,7 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -51,7 +51,7 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -65,7 +65,7 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -79,7 +79,7 @@ define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -93,7 +93,7 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -107,7 +107,7 @@ define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -122,7 +122,7 @@ define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -136,7 +136,7 @@ define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -150,7 +150,7 @@ define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -164,7 +164,7 @@ define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -178,7 +178,7 @@ define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
store i64 %or, i64 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/saddo.ll b/test/CodeGen/R600/saddo.ll
index 8e625c1..f8ced79 100644
--- a/test/CodeGen/R600/saddo.ll
+++ b/test/CodeGen/R600/saddo.ll
@@ -28,8 +28,8 @@ define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}v_saddo_i32:
define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %sadd, 0
%carry = extractvalue { i32, i1 } %sadd, 1
@@ -52,8 +52,8 @@ define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
; SI: v_add_i32
; SI: v_addc_u32
define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 4
- %b = load i64 addrspace(1)* %bptr, align 4
+ %a = load i64, i64 addrspace(1)* %aptr, align 4
+ %b = load i64, i64 addrspace(1)* %bptr, align 4
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %sadd, 0
%carry = extractvalue { i64, i1 } %sadd, 1
diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
index dfb181d..0b96495 100644
--- a/test/CodeGen/R600/salu-to-valu.ll
+++ b/test/CodeGen/R600/salu-to-valu.ll
@@ -27,11 +27,11 @@ entry:
loop:
%4 = phi i64 [0, %entry], [%5, %loop]
%5 = add i64 %2, %4
- %6 = getelementptr i8 addrspace(1)* %in, i64 %5
- %7 = load i8 addrspace(1)* %6, align 1
+ %6 = getelementptr i8, i8 addrspace(1)* %in, i64 %5
+ %7 = load i8, i8 addrspace(1)* %6, align 1
%8 = or i64 %5, 1
- %9 = getelementptr i8 addrspace(1)* %in, i64 %8
- %10 = load i8 addrspace(1)* %9, align 1
+ %9 = getelementptr i8, i8 addrspace(1)* %in, i64 %8
+ %10 = load i8, i8 addrspace(1)* %9, align 1
%11 = add i8 %7, %10
%12 = sext i8 %11 to i32
store i32 %12, i32 addrspace(1)* %out
@@ -59,18 +59,18 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i32 addrspace(2)* addrspace(1)* %in
+ %1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
br label %endif
else:
- %2 = getelementptr i32 addrspace(2)* addrspace(1)* %in
- %3 = load i32 addrspace(2)* addrspace(1)* %2
+ %2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
+ %3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %2
br label %endif
endif:
%4 = phi i32 addrspace(2)* [%1, %if], [%3, %else]
- %5 = getelementptr i32 addrspace(2)* %4, i32 3000
- %6 = load i32 addrspace(2)* %5
+ %5 = getelementptr i32, i32 addrspace(2)* %4, i32 3000
+ %6 = load i32, i32 addrspace(2)* %5
store i32 %6, i32 addrspace(1)* %out
ret void
}
@@ -83,8 +83,8 @@ define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%1 = add i32 %0, 4
- %2 = getelementptr [8 x i32] addrspace(2)* %in, i32 %0, i32 4
- %3 = load i32 addrspace(2)* %2
+ %2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %0, i32 4
+ %3 = load i32, i32 addrspace(2)* %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
@@ -95,9 +95,9 @@ entry:
define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
entry:
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
- %tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
- %tmp3 = load <8 x i32> addrspace(2)* %tmp2, align 4
+ %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
ret void
}
@@ -110,9 +110,9 @@ entry:
define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
entry:
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
- %tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
- %tmp3 = load <16 x i32> addrspace(2)* %tmp2, align 4
+ %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
ret void
}
diff --git a/test/CodeGen/R600/scalar_to_vector.ll b/test/CodeGen/R600/scalar_to_vector.ll
index b82e552..0970e5d 100644
--- a/test/CodeGen/R600/scalar_to_vector.ll
+++ b/test/CodeGen/R600/scalar_to_vector.ll
@@ -11,7 +11,7 @@
; SI: buffer_store_short [[RESULT]]
; SI: s_endpgm
define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %tmp1 = load i32 addrspace(1)* %in, align 4
+ %tmp1 = load i32, i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %tmp1 to <2 x i16>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
@@ -27,7 +27,7 @@ define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(
; SI: buffer_store_short [[RESULT]]
; SI: s_endpgm
define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tmp1 = load float addrspace(1)* %in, align 4
+ %tmp1 = load float, float addrspace(1)* %in, align 4
%bc = bitcast float %tmp1 to <2 x i16>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
@@ -39,7 +39,7 @@ define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspac
; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-; %tmp1 = load i32 addrspace(1)* %in, align 4
+; %tmp1 = load i32, i32 addrspace(1)* %in, align 4
; %bc = bitcast i32 %tmp1 to <4 x i8>
; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
index b917ec6..759197c 100644
--- a/test/CodeGen/R600/schedule-fs-loop-nested.ll
+++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -3,7 +3,7 @@
define void @main() {
main_body:
- %0 = load <4 x float> addrspace(9)* null
+ %0 = load <4 x float>, <4 x float> addrspace(9)* null
%1 = extractelement <4 x float> %0, i32 3
%2 = fptosi float %1 to i32
%3 = bitcast i32 %2 to float
@@ -20,11 +20,11 @@ main_body:
%14 = bitcast float %12 to i32
%15 = add i32 %13, %14
%16 = bitcast i32 %15 to float
- %17 = load <4 x float> addrspace(9)* null
+ %17 = load <4 x float>, <4 x float> addrspace(9)* null
%18 = extractelement <4 x float> %17, i32 0
- %19 = load <4 x float> addrspace(9)* null
+ %19 = load <4 x float>, <4 x float> addrspace(9)* null
%20 = extractelement <4 x float> %19, i32 1
- %21 = load <4 x float> addrspace(9)* null
+ %21 = load <4 x float>, <4 x float> addrspace(9)* null
%22 = extractelement <4 x float> %21, i32 2
br label %LOOP
diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
index d6c194b..28cc08a 100644
--- a/test/CodeGen/R600/schedule-fs-loop.ll
+++ b/test/CodeGen/R600/schedule-fs-loop.ll
@@ -3,15 +3,15 @@
define void @main() {
main_body:
- %0 = load <4 x float> addrspace(9)* null
+ %0 = load <4 x float>, <4 x float> addrspace(9)* null
%1 = extractelement <4 x float> %0, i32 3
%2 = fptosi float %1 to i32
%3 = bitcast i32 %2 to float
- %4 = load <4 x float> addrspace(9)* null
+ %4 = load <4 x float>, <4 x float> addrspace(9)* null
%5 = extractelement <4 x float> %4, i32 0
- %6 = load <4 x float> addrspace(9)* null
+ %6 = load <4 x float>, <4 x float> addrspace(9)* null
%7 = extractelement <4 x float> %6, i32 1
- %8 = load <4 x float> addrspace(9)* null
+ %8 = load <4 x float>, <4 x float> addrspace(9)* null
%9 = extractelement <4 x float> %8, i32 2
br label %LOOP
diff --git a/test/CodeGen/R600/schedule-global-loads.ll b/test/CodeGen/R600/schedule-global-loads.ll
index b6437d2..3f728fd 100644
--- a/test/CodeGen/R600/schedule-global-loads.ll
+++ b/test/CodeGen/R600/schedule-global-loads.ll
@@ -14,9 +14,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI: buffer_store_dword [[REG0]]
; SI: buffer_store_dword [[REG1]]
define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
- %load0 = load i32 addrspace(1)* %ptr, align 4
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 1
- %load1 = load i32 addrspace(1)* %gep, align 4
+ %load0 = load i32, i32 addrspace(1)* %ptr, align 4
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 1
+ %load1 = load i32, i32 addrspace(1)* %gep, align 4
store i32 %load0, i32 addrspace(1)* %out0, align 4
store i32 %load1, i32 addrspace(1)* %out1, align 4
ret void
@@ -29,9 +29,9 @@ define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)*
; SI: buffer_load_dword
define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
entry:
- %out1 = getelementptr i32 addrspace(1)* %out, i32 %offset
- %tmp0 = load i32 addrspace(1)* %out
- %tmp1 = load i32 addrspace(1)* %out1
+ %out1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
+ %tmp0 = load i32, i32 addrspace(1)* %out
+ %tmp1 = load i32, i32 addrspace(1)* %out1
%tmp2 = add i32 %tmp0, %tmp1
store i32 %tmp2, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
index 38aad18..5494650 100644
--- a/test/CodeGen/R600/schedule-if-2.ll
+++ b/test/CodeGen/R600/schedule-if-2.ll
@@ -3,10 +3,10 @@
define void @main() {
main_body:
- %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%1 = extractelement <4 x float> %0, i32 0
%2 = fadd float 1.000000e+03, %1
- %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%4 = extractelement <4 x float> %3, i32 0
%5 = bitcast float %4 to i32
%6 = icmp eq i32 %5, 0
@@ -47,7 +47,7 @@ IF: ; preds = %main_body
br label %ENDIF
ELSE: ; preds = %main_body
- %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%37 = extractelement <4 x float> %36, i32 0
%38 = bitcast float %37 to i32
%39 = icmp eq i32 %38, 1
@@ -80,7 +80,7 @@ IF23: ; preds = %ELSE
%.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
%55 = bitcast float %.28 to i32
%56 = sitofp i32 %55 to float
- %57 = load <4 x float> addrspace(8)* null
+ %57 = load <4 x float>, <4 x float> addrspace(8)* null
%58 = extractelement <4 x float> %57, i32 0
%59 = fsub float -0.000000e+00, %58
%60 = fadd float %2, %59
diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
index f960c93..94c653c 100644
--- a/test/CodeGen/R600/schedule-if.ll
+++ b/test/CodeGen/R600/schedule-if.ll
@@ -3,7 +3,7 @@
define void @main() {
main_body:
- %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%1 = extractelement <4 x float> %0, i32 0
%2 = bitcast float %1 to i32
%3 = icmp eq i32 %2, 0
@@ -14,7 +14,7 @@ main_body:
br i1 %7, label %ENDIF, label %ELSE
ELSE: ; preds = %main_body
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%9 = extractelement <4 x float> %8, i32 0
%10 = bitcast float %9 to i32
%11 = icmp eq i32 %10, 1
@@ -36,7 +36,7 @@ ENDIF: ; preds = %IF13, %ELSE, %main_
ret void
IF13: ; preds = %ELSE
- %20 = load <4 x float> addrspace(8)* null
+ %20 = load <4 x float>, <4 x float> addrspace(8)* null
%21 = extractelement <4 x float> %20, i32 0
%22 = fsub float -0.000000e+00, %21
%23 = fadd float 1.000000e+03, %22
diff --git a/test/CodeGen/R600/schedule-kernel-arg-loads.ll b/test/CodeGen/R600/schedule-kernel-arg-loads.ll
index f9641fa..6b3e081 100644
--- a/test/CodeGen/R600/schedule-kernel-arg-loads.ll
+++ b/test/CodeGen/R600/schedule-kernel-arg-loads.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI --check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI --check-prefix=GCN %s
; FUNC-LABEL: {{^}}cluster_arg_loads:
; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
@@ -18,3 +18,34 @@ define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1,
store i32 %y, i32 addrspace(1)* %out1, align 4
ret void
}
+
+; Test for a crash in SIInstrInfo::areLoadsFromSameBasePtr() when
+; s_load_dwordx2 has a register offset
+
+; FUNC-LABEL: @same_base_ptr_crash
+; GCN: s_load_dwordx2
+; GCN: s_load_dwordx2
+; GCN: s_load_dwordx2
+; GCN: s_endpgm
+define void @same_base_ptr_crash(i64 addrspace(1)* %out,
+ i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7,
+ i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, i64 %arg13, i64 %arg14, i64 %arg15,
+ i64 %arg16, i64 %arg17, i64 %arg18, i64 %arg19, i64 %arg20, i64 %arg21, i64 %arg22, i64 %arg23,
+ i64 %arg24, i64 %arg25, i64 %arg26, i64 %arg27, i64 %arg28, i64 %arg29, i64 %arg30, i64 %arg31,
+ i64 %arg32, i64 %arg33, i64 %arg34, i64 %arg35, i64 %arg36, i64 %arg37, i64 %arg38, i64 %arg39,
+ i64 %arg40, i64 %arg41, i64 %arg42, i64 %arg43, i64 %arg44, i64 %arg45, i64 %arg46, i64 %arg47,
+ i64 %arg48, i64 %arg49, i64 %arg50, i64 %arg51, i64 %arg52, i64 %arg53, i64 %arg54, i64 %arg55,
+ i64 %arg56, i64 %arg57, i64 %arg58, i64 %arg59, i64 %arg60, i64 %arg61, i64 %arg62, i64 %arg63,
+ i64 %arg64, i64 %arg65, i64 %arg66, i64 %arg67, i64 %arg68, i64 %arg69, i64 %arg70, i64 %arg71,
+ i64 %arg72, i64 %arg73, i64 %arg74, i64 %arg75, i64 %arg76, i64 %arg77, i64 %arg78, i64 %arg79,
+ i64 %arg80, i64 %arg81, i64 %arg82, i64 %arg83, i64 %arg84, i64 %arg85, i64 %arg86, i64 %arg87,
+ i64 %arg88, i64 %arg89, i64 %arg90, i64 %arg91, i64 %arg92, i64 %arg93, i64 %arg94, i64 %arg95,
+ i64 %arg96, i64 %arg97, i64 %arg98, i64 %arg99, i64 %arg100, i64 %arg101, i64 %arg102, i64 %arg103,
+ i64 %arg104, i64 %arg105, i64 %arg106, i64 %arg107, i64 %arg108, i64 %arg109, i64 %arg110, i64 %arg111,
+ i64 %arg112, i64 %arg113, i64 %arg114, i64 %arg115, i64 %arg116, i64 %arg117, i64 %arg118, i64 %arg119,
+ i64 %arg120, i64 %arg121, i64 %arg122, i64 %arg123, i64 %arg124, i64 %arg125, i64 %arg126) {
+entry:
+ %value = add i64 %arg125, %arg126
+ store i64 %value, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
index 76b655d..3863afd 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
@@ -39,63 +39,63 @@ ENDIF: ; preds = %main_body, %Flow2
%temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %101, %Flow2 ]
%15 = extractelement <4 x float> %reg1, i32 1
%16 = extractelement <4 x float> %reg1, i32 3
- %17 = load <4 x float> addrspace(9)* null
+ %17 = load <4 x float>, <4 x float> addrspace(9)* null
%18 = extractelement <4 x float> %17, i32 0
%19 = fmul float %18, %0
- %20 = load <4 x float> addrspace(9)* null
+ %20 = load <4 x float>, <4 x float> addrspace(9)* null
%21 = extractelement <4 x float> %20, i32 1
%22 = fmul float %21, %0
- %23 = load <4 x float> addrspace(9)* null
+ %23 = load <4 x float>, <4 x float> addrspace(9)* null
%24 = extractelement <4 x float> %23, i32 2
%25 = fmul float %24, %0
- %26 = load <4 x float> addrspace(9)* null
+ %26 = load <4 x float>, <4 x float> addrspace(9)* null
%27 = extractelement <4 x float> %26, i32 3
%28 = fmul float %27, %0
- %29 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %29 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%30 = extractelement <4 x float> %29, i32 0
%31 = fmul float %30, %15
%32 = fadd float %31, %19
- %33 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %33 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%34 = extractelement <4 x float> %33, i32 1
%35 = fmul float %34, %15
%36 = fadd float %35, %22
- %37 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %37 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%38 = extractelement <4 x float> %37, i32 2
%39 = fmul float %38, %15
%40 = fadd float %39, %25
- %41 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %41 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%42 = extractelement <4 x float> %41, i32 3
%43 = fmul float %42, %15
%44 = fadd float %43, %28
- %45 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %45 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%46 = extractelement <4 x float> %45, i32 0
%47 = fmul float %46, %1
%48 = fadd float %47, %32
- %49 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %49 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%50 = extractelement <4 x float> %49, i32 1
%51 = fmul float %50, %1
%52 = fadd float %51, %36
- %53 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %53 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%54 = extractelement <4 x float> %53, i32 2
%55 = fmul float %54, %1
%56 = fadd float %55, %40
- %57 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %57 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%58 = extractelement <4 x float> %57, i32 3
%59 = fmul float %58, %1
%60 = fadd float %59, %44
- %61 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %61 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%62 = extractelement <4 x float> %61, i32 0
%63 = fmul float %62, %16
%64 = fadd float %63, %48
- %65 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %65 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%66 = extractelement <4 x float> %65, i32 1
%67 = fmul float %66, %16
%68 = fadd float %67, %52
- %69 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %69 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%70 = extractelement <4 x float> %69, i32 2
%71 = fmul float %70, %16
%72 = fadd float %71, %56
- %73 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %73 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%74 = extractelement <4 x float> %73, i32 3
%75 = fmul float %74, %16
%76 = fadd float %75, %60
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
index 33b20d3..8d980db 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -21,63 +21,63 @@ ENDIF: ; preds = %ENDIF16, %LOOP, %ma
%temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
%temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
%temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
- %11 = load <4 x float> addrspace(9)* null
+ %11 = load <4 x float>, <4 x float> addrspace(9)* null
%12 = extractelement <4 x float> %11, i32 0
%13 = fmul float %12, %0
- %14 = load <4 x float> addrspace(9)* null
+ %14 = load <4 x float>, <4 x float> addrspace(9)* null
%15 = extractelement <4 x float> %14, i32 1
%16 = fmul float %15, %0
- %17 = load <4 x float> addrspace(9)* null
+ %17 = load <4 x float>, <4 x float> addrspace(9)* null
%18 = extractelement <4 x float> %17, i32 2
%19 = fmul float %18, %0
- %20 = load <4 x float> addrspace(9)* null
+ %20 = load <4 x float>, <4 x float> addrspace(9)* null
%21 = extractelement <4 x float> %20, i32 3
%22 = fmul float %21, %0
- %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %23 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%24 = extractelement <4 x float> %23, i32 0
%25 = fmul float %24, %1
%26 = fadd float %25, %13
- %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %27 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%28 = extractelement <4 x float> %27, i32 1
%29 = fmul float %28, %1
%30 = fadd float %29, %16
- %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %31 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%32 = extractelement <4 x float> %31, i32 2
%33 = fmul float %32, %1
%34 = fadd float %33, %19
- %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %35 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%36 = extractelement <4 x float> %35, i32 3
%37 = fmul float %36, %1
%38 = fadd float %37, %22
- %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %39 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%40 = extractelement <4 x float> %39, i32 0
%41 = fmul float %40, %2
%42 = fadd float %41, %26
- %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %43 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%44 = extractelement <4 x float> %43, i32 1
%45 = fmul float %44, %2
%46 = fadd float %45, %30
- %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %47 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%48 = extractelement <4 x float> %47, i32 2
%49 = fmul float %48, %2
%50 = fadd float %49, %34
- %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %51 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%52 = extractelement <4 x float> %51, i32 3
%53 = fmul float %52, %2
%54 = fadd float %53, %38
- %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %55 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%56 = extractelement <4 x float> %55, i32 0
%57 = fmul float %56, %3
%58 = fadd float %57, %42
- %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %59 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%60 = extractelement <4 x float> %59, i32 1
%61 = fmul float %60, %3
%62 = fadd float %61, %46
- %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %63 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%64 = extractelement <4 x float> %63, i32 2
%65 = fmul float %64, %3
%66 = fadd float %65, %50
- %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %67 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%68 = extractelement <4 x float> %67, i32 3
%69 = fmul float %68, %3
%70 = fadd float %69, %54
diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll
index 8c5a990..5608871 100644
--- a/test/CodeGen/R600/scratch-buffer.ll
+++ b/test/CodeGen/R600/scratch-buffer.ll
@@ -19,23 +19,23 @@ entry:
%scratch0 = alloca [8192 x i32]
%scratch1 = alloca [8192 x i32]
- %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0
+ %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 0
store i32 1, i32* %scratchptr0
- %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0
+ %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 0
store i32 2, i32* %scratchptr1
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else
if:
- %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32* %if_ptr
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32, i32* %if_ptr
br label %done
else:
- %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32* %else_ptr
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32, i32* %else_ptr
br label %done
done:
@@ -57,26 +57,26 @@ entry:
%scratch0 = alloca [8192 x i32]
%scratch1 = alloca [8192 x i32]
- %offset0 = load i32 addrspace(1)* %offsets
- %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0
+ %offset0 = load i32, i32 addrspace(1)* %offsets
+ %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %offset0
store i32 %offset0, i32* %scratchptr0
- %offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1
- %offset1 = load i32 addrspace(1)* %offsetptr1
- %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1
+ %offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1
+ %offset1 = load i32, i32 addrspace(1)* %offsetptr1
+ %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %offset1
store i32 %offset1, i32* %scratchptr1
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else
if:
- %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32* %if_ptr
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32, i32* %if_ptr
br label %done
else:
- %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32* %else_ptr
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32, i32* %else_ptr
br label %done
done:
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
index 07bb417..de64535 100644
--- a/test/CodeGen/R600/sdiv.ll
+++ b/test/CodeGen/R600/sdiv.ll
@@ -14,9 +14,9 @@
; FUNC-LABEL: {{^}}sdiv_i32:
; EG: CF_END
define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in
- %den = load i32 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in
+ %den = load i32, i32 addrspace(1) * %den_ptr
%result = sdiv i32 %num, %den
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -24,7 +24,7 @@ define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; FUNC-LABEL: {{^}}sdiv_i32_4:
define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32 addrspace(1) * %in
+ %num = load i32, i32 addrspace(1) * %in
%result = sdiv i32 %num, 4
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -44,39 +44,39 @@ define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: buffer_store_dword
; SI: s_endpgm
define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32 addrspace(1) * %in
+ %num = load i32, i32 addrspace(1) * %in
%result = sdiv i32 %num, 3435
store i32 %result, i32 addrspace(1)* %out
ret void
}
define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %num = load <2 x i32> addrspace(1) * %in
- %den = load <2 x i32> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
%result = sdiv <2 x i32> %num, %den
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %num = load <2 x i32> addrspace(1) * %in
+ %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
%result = sdiv <2 x i32> %num, <i32 4, i32 4>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %num = load <4 x i32> addrspace(1) * %in
- %den = load <4 x i32> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
%result = sdiv <4 x i32> %num, %den
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %num = load <4 x i32> addrspace(1) * %in
+ %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/sdivrem24.ll b/test/CodeGen/R600/sdivrem24.ll
index e8c5c25..ad5df39 100644
--- a/test/CodeGen/R600/sdivrem24.ll
+++ b/test/CodeGen/R600/sdivrem24.ll
@@ -13,9 +13,9 @@
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
- %num = load i8 addrspace(1) * %in
- %den = load i8 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
%result = sdiv i8 %num, %den
store i8 %result, i8 addrspace(1)* %out
ret void
@@ -32,9 +32,9 @@ define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
- %num = load i16 addrspace(1) * %in, align 2
- %den = load i16 addrspace(1) * %den_ptr, align 2
+ %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %num = load i16, i16 addrspace(1) * %in, align 2
+ %den = load i16, i16 addrspace(1) * %den_ptr, align 2
%result = sdiv i16 %num, %den
store i16 %result, i16 addrspace(1)* %out, align 2
ret void
@@ -51,9 +51,9 @@ define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 8
%num.i24 = ashr i32 %num.i24.0, 8
@@ -70,9 +70,9 @@ define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 7
%num.i24 = ashr i32 %num.i24.0, 7
@@ -89,9 +89,9 @@ define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 7
%num.i24 = ashr i32 %num.i24.0, 8
@@ -108,9 +108,9 @@ define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 8
%num.i24 = ashr i32 %num.i24.0, 7
@@ -131,9 +131,9 @@ define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
- %num = load i8 addrspace(1) * %in
- %den = load i8 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
%result = srem i8 %num, %den
store i8 %result, i8 addrspace(1)* %out
ret void
@@ -150,9 +150,9 @@ define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
- %num = load i16 addrspace(1) * %in, align 2
- %den = load i16 addrspace(1) * %den_ptr, align 2
+ %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %num = load i16, i16 addrspace(1) * %in, align 2
+ %den = load i16, i16 addrspace(1) * %den_ptr, align 2
%result = srem i16 %num, %den
store i16 %result, i16 addrspace(1)* %out, align 2
ret void
@@ -169,9 +169,9 @@ define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 8
%num.i24 = ashr i32 %num.i24.0, 8
@@ -188,9 +188,9 @@ define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 7
%num.i24 = ashr i32 %num.i24.0, 7
@@ -207,9 +207,9 @@ define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 7
%num.i24 = ashr i32 %num.i24.0, 8
@@ -226,9 +226,9 @@ define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_srem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 8
%num.i24 = ashr i32 %num.i24.0, 7
diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/R600/select64.ll
index 0245dae..5cebb30 100644
--- a/test/CodeGen/R600/select64.ll
+++ b/test/CodeGen/R600/select64.ll
@@ -42,8 +42,8 @@ define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %
; CHECK-NOT: v_cndmask_b32
define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%sel = select i1 %cmp, i64 %a, i64 %b
%trunc = trunc i64 %sel to i32
store i32 %trunc, i32 addrspace(1)* %out, align 4
@@ -60,8 +60,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa
; CHECK: s_endpgm
define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32
store i64 %sel, i64 addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/selectcc-cnd.ll b/test/CodeGen/R600/selectcc-cnd.ll
index 0bfca69..94d0ace 100644
--- a/test/CodeGen/R600/selectcc-cnd.ll
+++ b/test/CodeGen/R600/selectcc-cnd.ll
@@ -4,7 +4,7 @@
;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
;CHECK: 1073741824
define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
- %1 = load float addrspace(1)* %in
+ %1 = load float, float addrspace(1)* %in
%2 = fcmp oeq float %1, 0.0
%3 = select i1 %2, float 1.0, float 2.0
store float %3, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/selectcc-cnde-int.ll b/test/CodeGen/R600/selectcc-cnde-int.ll
index d568888..58a4ee7 100644
--- a/test/CodeGen/R600/selectcc-cnde-int.ll
+++ b/test/CodeGen/R600/selectcc-cnde-int.ll
@@ -4,7 +4,7 @@
;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
;CHECK-NEXT: 2
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %1 = load i32 addrspace(1)* %in
+ %1 = load i32, i32 addrspace(1)* %in
%2 = icmp eq i32 %1, 0
%3 = select i1 %2, i32 1, i32 2
store i32 %3, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
index 6743800..e870ee8 100644
--- a/test/CodeGen/R600/selectcc-icmp-select-float.ll
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -8,7 +8,7 @@
define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
%1 = icmp sge i32 %0, 0
%2 = select i1 %1, float 1.0, float 0.0
store float %2, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
index 7780371..65be4a6 100644
--- a/test/CodeGen/R600/selectcc-opt.ll
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -19,7 +19,7 @@ entry:
br i1 %6, label %IF, label %ENDIF
IF:
- %7 = getelementptr i32 addrspace(1)* %out, i32 1
+ %7 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %7
br label %ENDIF
@@ -47,7 +47,7 @@ entry:
br i1 %6, label %ENDIF, label %IF
IF:
- %7 = getelementptr i32 addrspace(1)* %out, i32 1
+ %7 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %7
br label %ENDIF
diff --git a/test/CodeGen/R600/setcc-opt.ll b/test/CodeGen/R600/setcc-opt.ll
index 93860f5..4e6a10d 100644
--- a/test/CodeGen/R600/setcc-opt.ll
+++ b/test/CodeGen/R600/setcc-opt.ll
@@ -40,7 +40,7 @@ define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
; GCN: v_cmp_eq_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
-; GCN-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
+; GCN-NEXT: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
; GCN-NEXT: buffer_store_byte [[TMP]]
; GCN-NEXT: s_endpgm
@@ -56,7 +56,7 @@ define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
; GCN: v_cmp_ne_i32_e32 vcc,
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
-; GCN-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
+; GCN-NEXT: v_cmp_ne_i32_e32 vcc, 1, [[TMP]]{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
; GCN-NEXT: buffer_store_byte [[TMP]]
; GCN-NEXT: s_endpgm
@@ -129,8 +129,8 @@ define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; VI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
; VI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
-; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
-; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; GCN: v_cmp_ne_i32_e32 vcc, 2, [[VB]]{{$}}
+; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN: buffer_store_byte
; GCN: s_endpgm
define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@@ -144,7 +144,7 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
+; GCN: v_cmp_ne_i32_e32 vcc, [[K255]], [[B]]
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
@@ -157,12 +157,12 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
; GCN: buffer_load_sbyte [[B:v[0-9]+]]
-; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
-; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; GCN: v_cmp_ne_i32_e32 vcc, -1, [[B]]{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
- %b = load i8 addrspace(1)* %b.ptr
+ %b = load i8, i8 addrspace(1)* %b.ptr
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
store i1 %icmp0, i1 addrspace(1)* %out
@@ -171,7 +171,7 @@ define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nou
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
; GCN: s_load_dword [[B:s[0-9]+]]
-; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
+; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -1, [[B]]
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
@@ -189,7 +189,7 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
+; GCN: v_cmp_ne_i32_e32 vcc, [[K]], [[B]]{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
; GCN-NEXT: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll
index f9c7e4f..f33a82d 100644
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -21,9 +21,9 @@ define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = icmp eq <4 x i32> %a, %b
%sext = sext <4 x i1> %result to <4 x i32>
store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
@@ -344,11 +344,11 @@ entry:
; SI: s_endpgm
define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr <3 x i32> addrspace(1)* %ptra, i32 %tid
- %gep.b = getelementptr <3 x i32> addrspace(1)* %ptrb, i32 %tid
- %gep.out = getelementptr <3 x i32> addrspace(1)* %out, i32 %tid
- %a = load <3 x i32> addrspace(1)* %gep.a
- %b = load <3 x i32> addrspace(1)* %gep.b
+ %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
+ %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
+ %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
+ %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a
+ %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b
%cmp = icmp eq <3 x i32> %a, %b
%ext = sext <3 x i1> %cmp to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out
@@ -365,11 +365,11 @@ define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptr
; SI: s_endpgm
define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr <3 x i8> addrspace(1)* %ptra, i32 %tid
- %gep.b = getelementptr <3 x i8> addrspace(1)* %ptrb, i32 %tid
- %gep.out = getelementptr <3 x i8> addrspace(1)* %out, i32 %tid
- %a = load <3 x i8> addrspace(1)* %gep.a
- %b = load <3 x i8> addrspace(1)* %gep.b
+ %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
+ %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
+ %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid
+ %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a
+ %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b
%cmp = icmp eq <3 x i8> %a, %b
%ext = sext <3 x i1> %cmp to <3 x i8>
store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll
index 3260179..d9ad493 100644
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -187,11 +187,11 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
- %a = load i64 addrspace(1)* %a.gep, align 8
- %b = load i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 63
@@ -208,11 +208,11 @@ define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
- %a = load i64 addrspace(1)* %a.gep, align 8
- %b = load i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 56
@@ -229,11 +229,11 @@ define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
- %a = load i64 addrspace(1)* %a.gep, align 8
- %b = load i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 48
@@ -249,11 +249,11 @@ define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}}
define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
- %a = load i64 addrspace(1)* %a.gep, align 8
- %b = load i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 32
@@ -263,9 +263,9 @@ define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
}
; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
-; SI-NOT: {{[^@]}}bfe
-; SI: s_lshl_b32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
-; SI: s_ashr_i32 {{s[0-9]+}}, [[REG]], 7
+; SI-NOT: s_lshl
+; SI-NOT: s_ashr
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
@@ -282,10 +282,10 @@ define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a,
}
; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
-; SI-DAG: s_lshl_b32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
-; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG0]], 7
-; SI-DAG: s_lshl_b32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
-; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG1]], 7
+; SI-NOT: s_lshl
+; SI-NOT: s_ashr
+; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
+; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
; SI: s_endpgm
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
@@ -428,8 +428,8 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
- %loada = load <4 x i32> addrspace(1)* %a, align 16
- %loadb = load <4 x i32> addrspace(1)* %b, align 16
+ %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
+ %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
%c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
%ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
@@ -441,8 +441,8 @@ define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
- %loada = load <4 x i32> addrspace(1)* %a, align 16
- %loadb = load <4 x i32> addrspace(1)* %b, align 16
+ %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
+ %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
%c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
%ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
@@ -459,7 +459,7 @@ define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x
; SI: v_bfe_i32
; SI: buffer_store_short
define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
- %tmp5 = load i8 addrspace(1)* %src, align 1
+ %tmp5 = load i8, i8 addrspace(1)* %src, align 1
%tmp2 = sext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
@@ -474,7 +474,7 @@ declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32 addrspace(1)* %ptr, align 4
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -485,7 +485,7 @@ define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwin
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32 addrspace(1)* %ptr, align 4
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -496,7 +496,7 @@ define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwin
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
; SI: s_endpgm
define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32 addrspace(1)* %ptr, align 4
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -509,7 +509,7 @@ define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32 addrspace(1)* %ptr, align 4
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -545,7 +545,7 @@ define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
- %load = load i8 addrspace(1)* %ptr, align 1
+ %load = load i8, i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
%shl = shl i32 %bfe, 24
@@ -554,12 +554,12 @@ define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %pt
ret void
}
-; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:
; SI: .text
+; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
- %load = load i8 addrspace(1)* %ptr, align 1
+ %load = load i8, i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
%shl = shl i32 %bfe, 24
@@ -574,7 +574,7 @@ define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
; SI: s_endpgm
define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%shr = ashr i32 %shl, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
@@ -589,7 +589,7 @@ define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
; SI: s_endpgm
define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
%shr = ashr i32 %shl, 30
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
@@ -599,12 +599,13 @@ define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1
; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
; SI: buffer_load_dword
-; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI-NOT: v_lshl
+; SI-NOT: v_ashr
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
; SI: s_endpgm
define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
%shr = ashr i32 %shl, 30
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll
index f0236ac..38289ce 100644
--- a/test/CodeGen/R600/sgpr-control-flow.ll
+++ b/test/CodeGen/R600/sgpr-control-flow.ll
@@ -64,15 +64,15 @@ endif:
; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br:
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
-; SI: v_cmp_lt_i32_e64 [[CMP_IF:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
; SI: BB2_1:
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
-; SI: v_cmp_eq_i32_e64 [[CMP_ELSE:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cmp_eq_i32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
-; SI: v_cmp_ne_i32_e64 [[CMP_CMP:s\[[0-9]+:[0-9]+\]]], [[V_CMP]], 0
+; SI: v_cmp_ne_i32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
; SI: buffer_store_dword [[RESULT]]
define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
@@ -82,14 +82,14 @@ entry:
br i1 %tmp1, label %if, label %else
if:
- %gep.if = getelementptr i32 addrspace(1)* %a, i32 %tid
- %a.val = load i32 addrspace(1)* %gep.if
+ %gep.if = getelementptr i32, i32 addrspace(1)* %a, i32 %tid
+ %a.val = load i32, i32 addrspace(1)* %gep.if
%cmp.if = icmp eq i32 %a.val, 0
br label %endif
else:
- %gep.else = getelementptr i32 addrspace(1)* %b, i32 %tid
- %b.val = load i32 addrspace(1)* %gep.else
+ %gep.else = getelementptr i32, i32 addrspace(1)* %b, i32 %tid
+ %b.val = load i32, i32 addrspace(1)* %gep.else
%cmp.else = icmp slt i32 %b.val, 0
br label %endif
diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
index 893f5a3..df67fcc 100644
--- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
+++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -7,7 +7,7 @@
; SI-LABEL: {{^}}test_dup_operands:
; SI: v_add_i32_e32
define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
- %a = load <2 x i32> addrspace(1)* %in
+ %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
%lo = extractelement <2 x i32> %a, i32 0
%hi = extractelement <2 x i32> %a, i32 1
%add = add i32 %lo, %lo
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index 57cbadd..b849c40 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -9,8 +9,8 @@
define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
%23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
%24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
@@ -33,8 +33,8 @@ ENDIF: ; preds = %main_body, %ELSE
; CHECK-LABEL: {{^}}phi2:
define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
%23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
%24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
@@ -50,10 +50,10 @@ main_body:
%34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
%35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
%36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
- %37 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
- %38 = load <32 x i8> addrspace(2)* %37, !tbaa !1
- %39 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
- %40 = load <16 x i8> addrspace(2)* %39, !tbaa !1
+ %37 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
+ %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, !tbaa !1
+ %39 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
+ %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, !tbaa !1
%41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
%42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
%43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
@@ -154,8 +154,8 @@ ENDIF24: ; preds = %ENDIF, %IF25
define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
%23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
%24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
@@ -236,13 +236,13 @@ declare i32 @llvm.SI.packf16(float, float) #1
define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
entry:
- %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8> addrspace(2)* %21, !tbaa !2
+ %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+ %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !2
%23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16)
- %24 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %25 = load <32 x i8> addrspace(2)* %24, !tbaa !2
- %26 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %27 = load <16 x i8> addrspace(2)* %26, !tbaa !2
+ %24 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+ %25 = load <32 x i8>, <32 x i8> addrspace(2)* %24, !tbaa !2
+ %26 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+ %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, !tbaa !2
%28 = fcmp oeq float %23, 0.0
br i1 %28, label %if, label %else
@@ -276,7 +276,7 @@ endif:
; CHECK: s_endpgm
define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
entry:
- %0 = load float addrspace(1)* %in0
+ %0 = load float, float addrspace(1)* %in0
%1 = fcmp oeq float %0, 0.0
br i1 %1, label %if0, label %endif
@@ -334,13 +334,13 @@ attributes #0 = { "ShaderType"="0" }
; CHECK: s_endpgm
define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
- %tmp = getelementptr [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
- %tmp22 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
+ %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
%tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
- %tmp25 = getelementptr [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
- %tmp26 = load <8 x i32> addrspace(2)* %tmp25, !tbaa !0
- %tmp27 = getelementptr [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
- %tmp28 = load <4 x i32> addrspace(2)* %tmp27, !tbaa !0
+ %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
+ %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !0
+ %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
+ %tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !0
%tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7)
%tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7)
%tmp31 = bitcast float %tmp23 to i32
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index f89353b..53b63dc 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -15,9 +15,9 @@
;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = shl <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -42,9 +42,9 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = shl <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -69,9 +69,9 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1) * %in
- %b = load i64 addrspace(1) * %b_ptr
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %a = load i64, i64 addrspace(1) * %in
+ %b = load i64, i64 addrspace(1) * %b_ptr
%result = shl i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -108,9 +108,9 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1) * %in
- %b = load <2 x i64> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
+ %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
%result = shl <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -171,9 +171,9 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1) * %in
- %b = load <4 x i64> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
+ %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
%result = shl <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/shl_add_constant.ll b/test/CodeGen/R600/shl_add_constant.ll
index 6915495..b1485bf 100644
--- a/test/CodeGen/R600/shl_add_constant.ll
+++ b/test/CodeGen/R600/shl_add_constant.ll
@@ -11,8 +11,8 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI: s_endpgm
define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32, i32 addrspace(1)* %ptr, align 4
%add = add i32 %val, 9
%result = shl i32 %add, 2
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -27,8 +27,8 @@ define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
; SI: s_endpgm
define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32, i32 addrspace(1)* %ptr, align 4
%add = add i32 %val, 9
%result = shl i32 %add, 2
store i32 %result, i32 addrspace(1)* %out0, align 4
@@ -45,8 +45,8 @@ define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1
; SI: s_endpgm
define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32, i32 addrspace(1)* %ptr, align 4
%shl = add i32 %val, 999
%result = shl i32 %shl, 2
store i32 %result, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/shl_add_ptr.ll b/test/CodeGen/R600/shl_add_ptr.ll
index d423153..066dafb 100644
--- a/test/CodeGen/R600/shl_add_ptr.ll
+++ b/test/CodeGen/R600/shl_add_ptr.ll
@@ -22,8 +22,8 @@ declare i32 @llvm.r600.read.tidig.x() #1
define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
store float %val0, float addrspace(1)* %out
ret void
@@ -42,8 +42,8 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad
define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%shl_add_use = shl i32 %idx.0, 2
store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
store float %val0, float addrspace(1)* %out
@@ -58,8 +58,8 @@ define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %ad
define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 65535
- %arrayidx0 = getelementptr inbounds [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
- %val0 = load i8 addrspace(3)* %arrayidx0
+ %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
+ %val0 = load i8, i8 addrspace(3)* %arrayidx0
store i32 %idx.0, i32 addrspace(1)* %add_use
store i8 %val0, i8 addrspace(1)* %out
ret void
@@ -76,10 +76,10 @@ define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 64
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
store float %sum, float addrspace(1)* %out, align 4
ret void
@@ -92,7 +92,7 @@ define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
store float 1.0, float addrspace(3)* %arrayidx0, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
ret void
@@ -107,8 +107,8 @@ define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %a
; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
; %idx.0 = add nsw i32 %tid.x, 2
-; %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-; %val = load atomic i32 addrspace(3)* %arrayidx0 seq_cst, align 4
+; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+; %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
; store i32 %val, i32 addrspace(1)* %out, align 4
; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
; ret void
@@ -122,7 +122,7 @@ define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %a
define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -137,7 +137,7 @@ define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace
define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -151,7 +151,7 @@ define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)
define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -165,7 +165,7 @@ define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -179,7 +179,7 @@ define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -193,7 +193,7 @@ define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -207,7 +207,7 @@ define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -217,7 +217,7 @@ define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
; %idx.0 = add nsw i32 %tid.x, 2
-; %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
; store i32 %val, i32 addrspace(1)* %out, align 4
; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -231,7 +231,7 @@ define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -245,7 +245,7 @@ define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -259,7 +259,7 @@ define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -273,7 +273,7 @@ define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)
define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
diff --git a/test/CodeGen/R600/si-lod-bias.ll b/test/CodeGen/R600/si-lod-bias.ll
index d6cbd0f..944499a 100644
--- a/test/CodeGen/R600/si-lod-bias.ll
+++ b/test/CodeGen/R600/si-lod-bias.ll
@@ -9,13 +9,13 @@
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
- %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
- %24 = load <32 x i8> addrspace(2)* %23, !tbaa !1
- %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
- %26 = load <16 x i8> addrspace(2)* %25, !tbaa !1
+ %23 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
+ %24 = load <32 x i8>, <32 x i8> addrspace(2)* %23, !tbaa !1
+ %25 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
+ %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, !tbaa !1
%27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
%28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
%29 = bitcast float %22 to i32
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
index 18fda20..8465270 100644
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -13,8 +13,8 @@
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+ %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+ %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
%23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
%24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
%25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
@@ -53,38 +53,38 @@ main_body:
%58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372)
%59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
%60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
- %61 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
- %63 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
- %65 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
- %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
- %67 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
- %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
- %69 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
- %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
- %71 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
- %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
- %73 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
- %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
- %75 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
- %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
- %77 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
- %78 = load <32 x i8> addrspace(2)* %77, !tbaa !0
- %79 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
- %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
- %81 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
- %82 = load <32 x i8> addrspace(2)* %81, !tbaa !0
- %83 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
- %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
- %85 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
- %86 = load <32 x i8> addrspace(2)* %85, !tbaa !0
- %87 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
- %88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
- %89 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
- %90 = load <32 x i8> addrspace(2)* %89, !tbaa !0
- %91 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
- %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
+ %61 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+ %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, !tbaa !0
+ %63 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+ %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, !tbaa !0
+ %65 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
+ %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, !tbaa !0
+ %67 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
+ %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, !tbaa !0
+ %69 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
+ %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, !tbaa !0
+ %71 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
+ %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, !tbaa !0
+ %73 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
+ %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, !tbaa !0
+ %75 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
+ %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, !tbaa !0
+ %77 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
+ %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, !tbaa !0
+ %79 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
+ %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, !tbaa !0
+ %81 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
+ %82 = load <32 x i8>, <32 x i8> addrspace(2)* %81, !tbaa !0
+ %83 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
+ %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, !tbaa !0
+ %85 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
+ %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, !tbaa !0
+ %87 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
+ %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, !tbaa !0
+ %89 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
+ %90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, !tbaa !0
+ %91 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
+ %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, !tbaa !0
%93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
%94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
%95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
@@ -103,29 +103,29 @@ main_body:
%108 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6)
%109 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6)
%110 = call i32 @llvm.SI.tid()
- %111 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
+ %111 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
%112 = bitcast float %93 to i32
store i32 %112, i32 addrspace(3)* %111
%113 = bitcast float %94 to i32
store i32 %113, i32 addrspace(3)* %111
%114 = call i32 @llvm.SI.tid()
- %115 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
+ %115 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
%116 = and i32 %114, -4
- %117 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
+ %117 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
%118 = add i32 %116, 1
- %119 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
+ %119 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
%120 = bitcast float %93 to i32
store i32 %120, i32 addrspace(3)* %115
- %121 = load i32 addrspace(3)* %117
+ %121 = load i32, i32 addrspace(3)* %117
%122 = bitcast i32 %121 to float
- %123 = load i32 addrspace(3)* %119
+ %123 = load i32, i32 addrspace(3)* %119
%124 = bitcast i32 %123 to float
%125 = fsub float %124, %122
%126 = bitcast float %94 to i32
store i32 %126, i32 addrspace(3)* %115
- %127 = load i32 addrspace(3)* %117
+ %127 = load i32, i32 addrspace(3)* %117
%128 = bitcast i32 %127 to float
- %129 = load i32 addrspace(3)* %119
+ %129 = load i32, i32 addrspace(3)* %119
%130 = bitcast i32 %129 to float
%131 = fsub float %130, %128
%132 = insertelement <4 x float> undef, float %125, i32 0
@@ -139,7 +139,7 @@ main_body:
%140 = fmul float %60, %94
%141 = fmul float %60, %94
%142 = call i32 @llvm.SI.tid()
- %143 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
+ %143 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
%144 = bitcast float %138 to i32
store i32 %144, i32 addrspace(3)* %143
%145 = bitcast float %139 to i32
@@ -149,37 +149,37 @@ main_body:
%147 = bitcast float %141 to i32
store i32 %147, i32 addrspace(3)* %143
%148 = call i32 @llvm.SI.tid()
- %149 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
+ %149 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
%150 = and i32 %148, -4
- %151 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
+ %151 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
%152 = add i32 %150, 2
- %153 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
+ %153 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
%154 = bitcast float %138 to i32
store i32 %154, i32 addrspace(3)* %149
- %155 = load i32 addrspace(3)* %151
+ %155 = load i32, i32 addrspace(3)* %151
%156 = bitcast i32 %155 to float
- %157 = load i32 addrspace(3)* %153
+ %157 = load i32, i32 addrspace(3)* %153
%158 = bitcast i32 %157 to float
%159 = fsub float %158, %156
%160 = bitcast float %139 to i32
store i32 %160, i32 addrspace(3)* %149
- %161 = load i32 addrspace(3)* %151
+ %161 = load i32, i32 addrspace(3)* %151
%162 = bitcast i32 %161 to float
- %163 = load i32 addrspace(3)* %153
+ %163 = load i32, i32 addrspace(3)* %153
%164 = bitcast i32 %163 to float
%165 = fsub float %164, %162
%166 = bitcast float %140 to i32
store i32 %166, i32 addrspace(3)* %149
- %167 = load i32 addrspace(3)* %151
+ %167 = load i32, i32 addrspace(3)* %151
%168 = bitcast i32 %167 to float
- %169 = load i32 addrspace(3)* %153
+ %169 = load i32, i32 addrspace(3)* %153
%170 = bitcast i32 %169 to float
%171 = fsub float %170, %168
%172 = bitcast float %141 to i32
store i32 %172, i32 addrspace(3)* %149
- %173 = load i32 addrspace(3)* %151
+ %173 = load i32, i32 addrspace(3)* %151
%174 = bitcast i32 %173 to float
- %175 = load i32 addrspace(3)* %153
+ %175 = load i32, i32 addrspace(3)* %153
%176 = bitcast i32 %175 to float
%177 = fsub float %176, %174
%178 = insertelement <4 x float> undef, float %159, i32 0
@@ -694,8 +694,8 @@ attributes #4 = { nounwind readonly }
; CHECK: s_endpgm
define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+ %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+ %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
%23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0)
%24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4)
%25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8)
@@ -799,42 +799,42 @@ main_body:
%123 = call float @llvm.SI.load.const(<16 x i8> %22, i32 716)
%124 = call float @llvm.SI.load.const(<16 x i8> %22, i32 864)
%125 = call float @llvm.SI.load.const(<16 x i8> %22, i32 868)
- %126 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %127 = load <32 x i8> addrspace(2)* %126, !tbaa !0
- %128 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %129 = load <16 x i8> addrspace(2)* %128, !tbaa !0
- %130 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
- %131 = load <32 x i8> addrspace(2)* %130, !tbaa !0
- %132 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
- %133 = load <16 x i8> addrspace(2)* %132, !tbaa !0
- %134 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
- %135 = load <32 x i8> addrspace(2)* %134, !tbaa !0
- %136 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
- %137 = load <16 x i8> addrspace(2)* %136, !tbaa !0
- %138 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
- %139 = load <32 x i8> addrspace(2)* %138, !tbaa !0
- %140 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
- %141 = load <16 x i8> addrspace(2)* %140, !tbaa !0
- %142 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
- %143 = load <32 x i8> addrspace(2)* %142, !tbaa !0
- %144 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
- %145 = load <16 x i8> addrspace(2)* %144, !tbaa !0
- %146 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
- %147 = load <32 x i8> addrspace(2)* %146, !tbaa !0
- %148 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
- %149 = load <16 x i8> addrspace(2)* %148, !tbaa !0
- %150 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
- %151 = load <32 x i8> addrspace(2)* %150, !tbaa !0
- %152 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
- %153 = load <16 x i8> addrspace(2)* %152, !tbaa !0
- %154 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
- %155 = load <32 x i8> addrspace(2)* %154, !tbaa !0
- %156 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
- %157 = load <16 x i8> addrspace(2)* %156, !tbaa !0
- %158 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 8
- %159 = load <32 x i8> addrspace(2)* %158, !tbaa !0
- %160 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 8
- %161 = load <16 x i8> addrspace(2)* %160, !tbaa !0
+ %126 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+ %127 = load <32 x i8>, <32 x i8> addrspace(2)* %126, !tbaa !0
+ %128 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+ %129 = load <16 x i8>, <16 x i8> addrspace(2)* %128, !tbaa !0
+ %130 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
+ %131 = load <32 x i8>, <32 x i8> addrspace(2)* %130, !tbaa !0
+ %132 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
+ %133 = load <16 x i8>, <16 x i8> addrspace(2)* %132, !tbaa !0
+ %134 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
+ %135 = load <32 x i8>, <32 x i8> addrspace(2)* %134, !tbaa !0
+ %136 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
+ %137 = load <16 x i8>, <16 x i8> addrspace(2)* %136, !tbaa !0
+ %138 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
+ %139 = load <32 x i8>, <32 x i8> addrspace(2)* %138, !tbaa !0
+ %140 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
+ %141 = load <16 x i8>, <16 x i8> addrspace(2)* %140, !tbaa !0
+ %142 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
+ %143 = load <32 x i8>, <32 x i8> addrspace(2)* %142, !tbaa !0
+ %144 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
+ %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, !tbaa !0
+ %146 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
+ %147 = load <32 x i8>, <32 x i8> addrspace(2)* %146, !tbaa !0
+ %148 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
+ %149 = load <16 x i8>, <16 x i8> addrspace(2)* %148, !tbaa !0
+ %150 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
+ %151 = load <32 x i8>, <32 x i8> addrspace(2)* %150, !tbaa !0
+ %152 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
+ %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, !tbaa !0
+ %154 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
+ %155 = load <32 x i8>, <32 x i8> addrspace(2)* %154, !tbaa !0
+ %156 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
+ %157 = load <16 x i8>, <16 x i8> addrspace(2)* %156, !tbaa !0
+ %158 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 8
+ %159 = load <32 x i8>, <32 x i8> addrspace(2)* %158, !tbaa !0
+ %160 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 8
+ %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, !tbaa !0
%162 = fcmp ugt float %17, 0.000000e+00
%163 = select i1 %162, float 1.000000e+00, float 0.000000e+00
%164 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
index a4475c0..5a6129a 100644
--- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
@@ -14,14 +14,14 @@ declare void @llvm.AMDGPU.barrier.local() #2
; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
; CI: buffer_store_dword
define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+ %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
- %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
- %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -34,14 +34,14 @@ define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out,
; CI: buffer_store_dword
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+ %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
- %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
- %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
store volatile i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -54,15 +54,15 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
; CI: buffer_store_dword
define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+ %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
- %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
- %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
call void @llvm.AMDGPU.barrier.local() #2
- %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -79,14 +79,14 @@ define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace
; CI: buffer_load_dword
; CI: buffer_store_dword
define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+ %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
- %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
- %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -100,14 +100,14 @@ define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1
; CI: ds_write_b32
; CI: buffer_store_dword
define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
- %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+ %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
- %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
- %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -122,12 +122,12 @@ define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %
; CI: ds_write_b32
; CI: buffer_store_dword
define void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
- %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -141,12 +141,12 @@ define void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32
; CI: ds_write_b32
; CI: buffer_store_dword
define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32 addrspace(1)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32 addrspace(1)* %ptr0, i64 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 2
- %tmp1 = load i32 addrspace(1)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(1)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32 addrspace(1)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(1)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -163,15 +163,15 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out,
; CI: buffer_store_dword
; CI: s_endpgm
define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 3
- %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 100
- %ptr3 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 101
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 100
+ %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 101
store i32 123, i32 addrspace(3)* %ptr1, align 4
- %tmp1 = load i32 addrspace(3)* %ptr2, align 4
- %tmp2 = load i32 addrspace(3)* %ptr3, align 4
+ %tmp1 = load i32, i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(3)* %ptr3, align 4
store i32 123, i32 addrspace(3)* %ptr2, align 4
- %tmp3 = load i32 addrspace(3)* %ptr1, align 4
+ %tmp3 = load i32, i32 addrspace(3)* %ptr1, align 4
store i32 789, i32 addrspace(3)* %ptr3, align 4
%add.0 = add nsw i32 %tmp2, %tmp1
@@ -189,15 +189,15 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
; CI: buffer_store_dword
; CI: s_endpgm
define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 3
- %ptr2 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 100
- %ptr3 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 101
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 100
+ %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 101
store i32 123, i32 addrspace(1)* %ptr1, align 4
- %tmp1 = load i32 addrspace(1)* %ptr2, align 4
- %tmp2 = load i32 addrspace(1)* %ptr3, align 4
+ %tmp1 = load i32, i32 addrspace(1)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(1)* %ptr3, align 4
store i32 123, i32 addrspace(1)* %ptr2, align 4
- %tmp3 = load i32 addrspace(1)* %ptr1, align 4
+ %tmp3 = load i32, i32 addrspace(1)* %ptr1, align 4
store i32 789, i32 addrspace(1)* %ptr3, align 4
%add.0 = add nsw i32 %tmp2, %tmp1
@@ -211,19 +211,19 @@ define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrsp
; XCI: TBUFFER_STORE_FORMAT
; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
; define void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #1 {
-; %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+; %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
-; %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
-; %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+; %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
+; %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
-; %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+; %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
; %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
; call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
; i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
; i32 1, i32 0)
-; %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+; %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
; %add = add nsw i32 %tmp1, %tmp2
diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/R600/si-vector-hang.ll
index 61812c6..94c47fe 100644
--- a/test/CodeGen/R600/si-vector-hang.ll
+++ b/test/CodeGen/R600/si-vector-hang.ll
@@ -17,52 +17,52 @@ target triple = "r600--"
; Function Attrs: nounwind
define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
entry:
- %0 = load i8 addrspace(1)* %in0, align 1
+ %0 = load i8, i8 addrspace(1)* %in0, align 1
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
- %arrayidx2.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 1
- %2 = load i8 addrspace(1)* %arrayidx2.i.i, align 1
+ %arrayidx2.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 1
+ %2 = load i8, i8 addrspace(1)* %arrayidx2.i.i, align 1
%3 = insertelement <8 x i8> %1, i8 %2, i32 1
- %arrayidx6.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 2
- %4 = load i8 addrspace(1)* %arrayidx6.i.i, align 1
+ %arrayidx6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 2
+ %4 = load i8, i8 addrspace(1)* %arrayidx6.i.i, align 1
%5 = insertelement <8 x i8> %3, i8 %4, i32 2
- %arrayidx10.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 3
- %6 = load i8 addrspace(1)* %arrayidx10.i.i, align 1
+ %arrayidx10.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 3
+ %6 = load i8, i8 addrspace(1)* %arrayidx10.i.i, align 1
%7 = insertelement <8 x i8> %5, i8 %6, i32 3
- %arrayidx.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 4
- %8 = load i8 addrspace(1)* %arrayidx.i.i, align 1
+ %arrayidx.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 4
+ %8 = load i8, i8 addrspace(1)* %arrayidx.i.i, align 1
%9 = insertelement <8 x i8> undef, i8 %8, i32 0
- %arrayidx2.i9.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 5
- %10 = load i8 addrspace(1)* %arrayidx2.i9.i, align 1
+ %arrayidx2.i9.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 5
+ %10 = load i8, i8 addrspace(1)* %arrayidx2.i9.i, align 1
%11 = insertelement <8 x i8> %9, i8 %10, i32 1
- %arrayidx6.i11.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 6
- %12 = load i8 addrspace(1)* %arrayidx6.i11.i, align 1
+ %arrayidx6.i11.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 6
+ %12 = load i8, i8 addrspace(1)* %arrayidx6.i11.i, align 1
%13 = insertelement <8 x i8> %11, i8 %12, i32 2
- %arrayidx10.i13.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 7
- %14 = load i8 addrspace(1)* %arrayidx10.i13.i, align 1
+ %arrayidx10.i13.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 7
+ %14 = load i8, i8 addrspace(1)* %arrayidx10.i13.i, align 1
%15 = insertelement <8 x i8> %13, i8 %14, i32 3
%vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
- %16 = load i8 addrspace(1)* %in1, align 1
+ %16 = load i8, i8 addrspace(1)* %in1, align 1
%17 = insertelement <8 x i8> undef, i8 %16, i32 0
- %arrayidx2.i.i4 = getelementptr inbounds i8 addrspace(1)* %in1, i64 1
- %18 = load i8 addrspace(1)* %arrayidx2.i.i4, align 1
+ %arrayidx2.i.i4 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 1
+ %18 = load i8, i8 addrspace(1)* %arrayidx2.i.i4, align 1
%19 = insertelement <8 x i8> %17, i8 %18, i32 1
- %arrayidx6.i.i5 = getelementptr inbounds i8 addrspace(1)* %in1, i64 2
- %20 = load i8 addrspace(1)* %arrayidx6.i.i5, align 1
+ %arrayidx6.i.i5 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 2
+ %20 = load i8, i8 addrspace(1)* %arrayidx6.i.i5, align 1
%21 = insertelement <8 x i8> %19, i8 %20, i32 2
- %arrayidx10.i.i6 = getelementptr inbounds i8 addrspace(1)* %in1, i64 3
- %22 = load i8 addrspace(1)* %arrayidx10.i.i6, align 1
+ %arrayidx10.i.i6 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 3
+ %22 = load i8, i8 addrspace(1)* %arrayidx10.i.i6, align 1
%23 = insertelement <8 x i8> %21, i8 %22, i32 3
- %arrayidx.i.i7 = getelementptr inbounds i8 addrspace(1)* %in1, i64 4
- %24 = load i8 addrspace(1)* %arrayidx.i.i7, align 1
+ %arrayidx.i.i7 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 4
+ %24 = load i8, i8 addrspace(1)* %arrayidx.i.i7, align 1
%25 = insertelement <8 x i8> undef, i8 %24, i32 0
- %arrayidx2.i9.i8 = getelementptr inbounds i8 addrspace(1)* %in1, i64 5
- %26 = load i8 addrspace(1)* %arrayidx2.i9.i8, align 1
+ %arrayidx2.i9.i8 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 5
+ %26 = load i8, i8 addrspace(1)* %arrayidx2.i9.i8, align 1
%27 = insertelement <8 x i8> %25, i8 %26, i32 1
- %arrayidx6.i11.i9 = getelementptr inbounds i8 addrspace(1)* %in1, i64 6
- %28 = load i8 addrspace(1)* %arrayidx6.i11.i9, align 1
+ %arrayidx6.i11.i9 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 6
+ %28 = load i8, i8 addrspace(1)* %arrayidx6.i11.i9, align 1
%29 = insertelement <8 x i8> %27, i8 %28, i32 2
- %arrayidx10.i13.i10 = getelementptr inbounds i8 addrspace(1)* %in1, i64 7
- %30 = load i8 addrspace(1)* %arrayidx10.i13.i10, align 1
+ %arrayidx10.i13.i10 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 7
+ %30 = load i8, i8 addrspace(1)* %arrayidx10.i13.i10, align 1
%31 = insertelement <8 x i8> %29, i8 %30, i32 3
%vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
%cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
@@ -70,25 +70,25 @@ entry:
%32 = extractelement <8 x i8> %cond.i, i32 0
store i8 %32, i8 addrspace(1)* %out, align 1
%33 = extractelement <8 x i8> %cond.i, i32 1
- %arrayidx2.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 1
+ %arrayidx2.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
store i8 %33, i8 addrspace(1)* %arrayidx2.i.i.i, align 1
%34 = extractelement <8 x i8> %cond.i, i32 2
- %arrayidx.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 2
+ %arrayidx.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 2
store i8 %34, i8 addrspace(1)* %arrayidx.i.i.i, align 1
%35 = extractelement <8 x i8> %cond.i, i32 3
- %arrayidx2.i6.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 3
+ %arrayidx2.i6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 3
store i8 %35, i8 addrspace(1)* %arrayidx2.i6.i.i, align 1
- %arrayidx.i.i3 = getelementptr inbounds i8 addrspace(1)* %out, i64 4
+ %arrayidx.i.i3 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4
%36 = extractelement <8 x i8> %cond.i, i32 4
store i8 %36, i8 addrspace(1)* %arrayidx.i.i3, align 1
%37 = extractelement <8 x i8> %cond.i, i32 5
- %arrayidx2.i.i6.i = getelementptr inbounds i8 addrspace(1)* %out, i64 5
+ %arrayidx2.i.i6.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 5
store i8 %37, i8 addrspace(1)* %arrayidx2.i.i6.i, align 1
%38 = extractelement <8 x i8> %cond.i, i32 6
- %arrayidx.i.i7.i = getelementptr inbounds i8 addrspace(1)* %out, i64 6
+ %arrayidx.i.i7.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 6
store i8 %38, i8 addrspace(1)* %arrayidx.i.i7.i, align 1
%39 = extractelement <8 x i8> %cond.i, i32 7
- %arrayidx2.i6.i8.i = getelementptr inbounds i8 addrspace(1)* %out, i64 7
+ %arrayidx2.i6.i8.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 7
store i8 %39, i8 addrspace(1)* %arrayidx2.i6.i8.i, align 1
ret void
}
diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll
index f194759..06bee11 100644
--- a/test/CodeGen/R600/sign_extend.ll
+++ b/test/CodeGen/R600/sign_extend.ll
@@ -48,7 +48,7 @@ define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
; SI: v_ashr
; SI: s_endpgm
define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%sext = sext i32 %val to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index 28a413c..dffee70 100644
--- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -22,16 +22,16 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
%idx = add i32 %a, %b
%alloca = alloca i64, i32 4
- %gep0 = getelementptr i64* %alloca, i64 0
- %gep1 = getelementptr i64* %alloca, i64 1
- %gep2 = getelementptr i64* %alloca, i64 2
- %gep3 = getelementptr i64* %alloca, i64 3
+ %gep0 = getelementptr i64, i64* %alloca, i64 0
+ %gep1 = getelementptr i64, i64* %alloca, i64 1
+ %gep2 = getelementptr i64, i64* %alloca, i64 2
+ %gep3 = getelementptr i64, i64* %alloca, i64 3
store i64 24, i64* %gep0, align 8
store i64 9334, i64* %gep1, align 8
store i64 3935, i64* %gep2, align 8
store i64 9342, i64* %gep3, align 8
- %gep = getelementptr i64* %alloca, i32 %idx
- %load = load i64* %gep, align 8
+ %gep = getelementptr i64, i64* %alloca, i32 %idx
+ %load = load i64, i64* %gep, align 8
%mask = and i64 %load, 4294967296
%add = add i64 %mask, -1
store i64 %add, i64 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/sint_to_fp.f64.ll b/test/CodeGen/R600/sint_to_fp.f64.ll
index 893cfb3..da4e91d 100644
--- a/test/CodeGen/R600/sint_to_fp.f64.ll
+++ b/test/CodeGen/R600/sint_to_fp.f64.ll
@@ -53,8 +53,8 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
- %val = load i64 addrspace(1)* %gep, align 8
+ %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep, align 8
%result = sitofp i64 %val to double
store double %result, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
index 6a291cf..8506441 100644
--- a/test/CodeGen/R600/sint_to_fp.ll
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -35,7 +35,7 @@ define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
; SI: v_cvt_f32_i32_e32
; SI: v_cvt_f32_i32_e32
define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %value = load <4 x i32> addrspace(1) * %in
+ %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = sitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll
index bad1668..b0c18ca 100644
--- a/test/CodeGen/R600/smrd.ll
+++ b/test/CodeGen/R600/smrd.ll
@@ -7,8 +7,8 @@
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
- %0 = getelementptr i32 addrspace(2)* %ptr, i64 1
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -19,8 +19,8 @@ entry:
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
- %0 = getelementptr i32 addrspace(2)* %ptr, i64 255
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -33,8 +33,8 @@ entry:
; GCN: s_endpgm
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
- %0 = getelementptr i32 addrspace(2)* %ptr, i64 256
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -54,8 +54,8 @@ entry:
; GCN: s_endpgm
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
- %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -66,8 +66,8 @@ entry:
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
ret void
@@ -80,8 +80,8 @@ main_body:
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
ret void
@@ -95,8 +95,8 @@ main_body:
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
ret void
diff --git a/test/CodeGen/R600/split-scalar-i64-add.ll b/test/CodeGen/R600/split-scalar-i64-add.ll
index ec50fd9..46409cd 100644
--- a/test/CodeGen/R600/split-scalar-i64-add.ll
+++ b/test/CodeGen/R600/split-scalar-i64-add.ll
@@ -37,8 +37,8 @@ define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64
; SI: v_addc_u32
define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %gep = getelementptr i32 addrspace(1)* %in, i32 %tid
- %load = load i32 addrspace(1)* %gep
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %load = load i32, i32 addrspace(1)* %gep
%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
%vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1
%bc = bitcast <2 x i32> %vec.1 to i64
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index d6c6ccd..bcbc32f 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -15,9 +15,9 @@
;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = ashr <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -42,9 +42,9 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = ashr <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -89,9 +89,9 @@ entry:
define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
- %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1) * %in
- %b = load i64 addrspace(1) * %b_ptr
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %a = load i64, i64 addrspace(1) * %in
+ %b = load i64, i64 addrspace(1) * %b_ptr
%result = ashr i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -132,9 +132,9 @@ entry:
;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1) * %in
- %b = load <2 x i64> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
+ %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
%result = ashr <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -203,9 +203,9 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1) * %in
- %b = load <4 x i64> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
+ %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
%result = ashr <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/R600/srem.ll
index 510db0e..c78fd54 100644
--- a/test/CodeGen/R600/srem.ll
+++ b/test/CodeGen/R600/srem.ll
@@ -3,16 +3,16 @@
; RUN: llc -march=r600 -mcpu=redwood < %s
define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in
- %den = load i32 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in
+ %den = load i32, i32 addrspace(1) * %den_ptr
%result = srem i32 %num, %den
store i32 %result, i32 addrspace(1)* %out
ret void
}
define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32 addrspace(1) * %in
+ %num = load i32, i32 addrspace(1) * %in
%result = srem i32 %num, 4
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -25,87 +25,87 @@ define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: v_sub_i32
; SI: s_endpgm
define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32 addrspace(1) * %in
+ %num = load i32, i32 addrspace(1) * %in
%result = srem i32 %num, 7
store i32 %result, i32 addrspace(1)* %out
ret void
}
define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %num = load <2 x i32> addrspace(1) * %in
- %den = load <2 x i32> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
%result = srem <2 x i32> %num, %den
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %num = load <2 x i32> addrspace(1) * %in
+ %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
%result = srem <2 x i32> %num, <i32 4, i32 4>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %num = load <4 x i32> addrspace(1) * %in
- %den = load <4 x i32> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
%result = srem <4 x i32> %num, %den
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %num = load <4 x i32> addrspace(1) * %in
+ %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %den_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %num = load i64 addrspace(1) * %in
- %den = load i64 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %num = load i64, i64 addrspace(1) * %in
+ %den = load i64, i64 addrspace(1) * %den_ptr
%result = srem i64 %num, %den
store i64 %result, i64 addrspace(1)* %out
ret void
}
define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %num = load i64 addrspace(1) * %in
+ %num = load i64, i64 addrspace(1) * %in
%result = srem i64 %num, 4
store i64 %result, i64 addrspace(1)* %out
ret void
}
define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %den_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %num = load <2 x i64> addrspace(1) * %in
- %den = load <2 x i64> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
+ %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr
%result = srem <2 x i64> %num, %den
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
}
define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %num = load <2 x i64> addrspace(1) * %in
+ %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
%result = srem <2 x i64> %num, <i64 4, i64 4>
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
}
define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %den_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %num = load <4 x i64> addrspace(1) * %in
- %den = load <4 x i64> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
+ %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr
%result = srem <4 x i64> %num, %den
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
}
define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %num = load <4 x i64> addrspace(1) * %in
+ %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
%result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4>
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index 1f9b620..4904d7f 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -7,9 +7,9 @@
; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = lshr i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -25,9 +25,9 @@ define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1)* %in
- %b = load <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
%result = lshr <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -49,9 +49,9 @@ define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1)* %in
- %b = load <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
%result = lshr <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -73,9 +73,9 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1)* %in
- %b = load i64 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %a = load i64, i64 addrspace(1)* %in
+ %b = load i64, i64 addrspace(1)* %b_ptr
%result = lshr i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -111,9 +111,9 @@ define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1)* %in
- %b = load <2 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
+ %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%result = lshr <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -177,9 +177,9 @@ define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1)* %in
- %b = load <4 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
+ %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
%result = lshr <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/R600/ssubo.ll
index 09d3959..26884a1 100644
--- a/test/CodeGen/R600/ssubo.ll
+++ b/test/CodeGen/R600/ssubo.ll
@@ -28,8 +28,8 @@ define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}v_ssubo_i32:
define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %ssub, 0
%carry = extractvalue { i32, i1 } %ssub, 1
@@ -54,8 +54,8 @@ define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
; SI: v_sub_i32_e32
; SI: v_subb_u32_e32
define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 4
- %b = load i64 addrspace(1)* %bptr, align 4
+ %a = load i64, i64 addrspace(1)* %aptr, align 4
+ %b = load i64, i64 addrspace(1)* %bptr, align 4
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
%carry = extractvalue { i64, i1 } %ssub, 1
diff --git a/test/CodeGen/R600/store-barrier.ll b/test/CodeGen/R600/store-barrier.ll
index ea65bb0..4a72b4d 100644
--- a/test/CodeGen/R600/store-barrier.ll
+++ b/test/CodeGen/R600/store-barrier.ll
@@ -14,24 +14,24 @@
; Function Attrs: nounwind
define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
bb:
- %tmp10 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp9
- %tmp13 = load i32 addrspace(1)* %tmp10, align 2
- %tmp14 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp13
- %tmp15 = load <2 x i8> addrspace(3)* %tmp14, align 2
+ %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
+ %tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
+ %tmp14 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp13
+ %tmp15 = load <2 x i8>, <2 x i8> addrspace(3)* %tmp14, align 2
%tmp16 = add i32 %tmp13, 1
- %tmp17 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp16
+ %tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16
store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
tail call void @llvm.AMDGPU.barrier.local() #2
- %tmp25 = load i32 addrspace(1)* %tmp10, align 4
+ %tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4
%tmp26 = sext i32 %tmp25 to i64
%tmp27 = sext i32 %arg4 to i64
- %tmp28 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
- %tmp29 = load i8 addrspace(3)* %tmp28, align 1
- %tmp30 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
+ %tmp28 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
+ %tmp29 = load i8, i8 addrspace(3)* %tmp28, align 1
+ %tmp30 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
- %tmp32 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
- %tmp33 = load i8 addrspace(3)* %tmp32, align 1
- %tmp35 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
+ %tmp32 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
+ %tmp33 = load i8, i8 addrspace(3)* %tmp32, align 1
+ %tmp35 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
ret void
}
diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll
index ba4d94f..d5af3b2 100644
--- a/test/CodeGen/R600/store-vector-ptrs.ll
+++ b/test/CodeGen/R600/store-vector-ptrs.ll
@@ -6,7 +6,7 @@
; scratch loads and stores.
; CHECK-LABEL: {{^}}store_vector_ptrs:
define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
- %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
store <4 x i32*> %p, <4 x i32*>* %out
ret void
}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index e4cb313..b51b9b7 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -16,7 +16,7 @@ entry:
}
; i8 store
-; EG-LABEL: {{^}}store_i8:
+; FUNC-LABEL: {{^}}store_i8:
; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; IG 0: Get the byte index and truncate the value
@@ -37,7 +37,6 @@ entry:
; EG: MOV T[[RW_GPR]].Y, 0.0
; EG: MOV * T[[RW_GPR]].Z, 0.0
-; SI-LABEL: {{^}}store_i8:
; SI: buffer_store_byte
define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
@@ -47,7 +46,7 @@ entry:
}
; i16 store
-; EG-LABEL: {{^}}store_i16:
+; FUNC-LABEL: {{^}}store_i16:
; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; IG 0: Get the byte index and truncate the value
@@ -71,7 +70,6 @@ entry:
; EG: MOV T[[RW_GPR]].Y, 0.0
; EG: MOV * T[[RW_GPR]].Z, 0.0
-; SI-LABEL: {{^}}store_i16:
; SI: buffer_store_short
define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
entry:
@@ -79,10 +77,10 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_v2i8:
+; FUNC-LABEL: {{^}}store_v2i8:
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
-; SI-LABEL: {{^}}store_v2i8:
+
; SI: buffer_store_byte
; SI: buffer_store_byte
define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
@@ -93,11 +91,11 @@ entry:
}
-; EG-LABEL: {{^}}store_v2i16:
+; FUNC-LABEL: {{^}}store_v2i16:
; EG: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}store_v2i16:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}store_v2i16:
+
; SI: buffer_store_short
; SI: buffer_store_short
define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
@@ -107,11 +105,11 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_v4i8:
+; FUNC-LABEL: {{^}}store_v4i8:
; EG: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}store_v4i8:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}store_v4i8:
+
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
@@ -124,11 +122,11 @@ entry:
}
; floating-point store
-; EG-LABEL: {{^}}store_f32:
+; FUNC-LABEL: {{^}}store_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
-; CM-LABEL: {{^}}store_f32:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-LABEL: {{^}}store_f32:
+
; SI: buffer_store_dword
define void @store_f32(float addrspace(1)* %out, float %in) {
@@ -136,13 +134,13 @@ define void @store_f32(float addrspace(1)* %out, float %in) {
ret void
}
-; EG-LABEL: {{^}}store_v4i16:
+; FUNC-LABEL: {{^}}store_v4i16:
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
-; SI-LABEL: {{^}}store_v4i16:
+
; SI: buffer_store_short
; SI: buffer_store_short
; SI: buffer_store_short
@@ -156,11 +154,11 @@ entry:
}
; vec2 floating-point stores
-; EG-LABEL: {{^}}store_v2f32:
+; FUNC-LABEL: {{^}}store_v2f32:
; EG: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}store_v2f32:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}store_v2f32:
+
; SI: buffer_store_dwordx2
define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
@@ -171,13 +169,13 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_v4i32:
+; FUNC-LABEL: {{^}}store_v4i32:
; EG: MEM_RAT_CACHELESS STORE_RAW
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}store_v4i32:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}store_v4i32:
+
; SI: buffer_store_dwordx4
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
@@ -218,29 +216,29 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_local_i8:
+; FUNC-LABEL: {{^}}store_local_i8:
; EG: LDS_BYTE_WRITE
-; SI-LABEL: {{^}}store_local_i8:
+
; SI: ds_write_b8
define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
store i8 %in, i8 addrspace(3)* %out
ret void
}
-; EG-LABEL: {{^}}store_local_i16:
+; FUNC-LABEL: {{^}}store_local_i16:
; EG: LDS_SHORT_WRITE
-; SI-LABEL: {{^}}store_local_i16:
+
; SI: ds_write_b16
define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
store i16 %in, i16 addrspace(3)* %out
ret void
}
-; EG-LABEL: {{^}}store_local_v2i16:
+; FUNC-LABEL: {{^}}store_local_v2i16:
; EG: LDS_WRITE
-; CM-LABEL: {{^}}store_local_v2i16:
+
; CM: LDS_WRITE
-; SI-LABEL: {{^}}store_local_v2i16:
+
; SI: ds_write_b16
; SI: ds_write_b16
define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
@@ -249,11 +247,11 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_local_v4i8:
+; FUNC-LABEL: {{^}}store_local_v4i8:
; EG: LDS_WRITE
-; CM-LABEL: {{^}}store_local_v4i8:
+
; CM: LDS_WRITE
-; SI-LABEL: {{^}}store_local_v4i8:
+
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
@@ -264,13 +262,13 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_local_v2i32:
+; FUNC-LABEL: {{^}}store_local_v2i32:
; EG: LDS_WRITE
; EG: LDS_WRITE
-; CM-LABEL: {{^}}store_local_v2i32:
+
; CM: LDS_WRITE
; CM: LDS_WRITE
-; SI-LABEL: {{^}}store_local_v2i32:
+
; SI: ds_write_b64
define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
entry:
@@ -278,17 +276,17 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_local_v4i32:
+; FUNC-LABEL: {{^}}store_local_v4i32:
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
-; CM-LABEL: {{^}}store_local_v4i32:
+
; CM: LDS_WRITE
; CM: LDS_WRITE
; CM: LDS_WRITE
; CM: LDS_WRITE
-; SI-LABEL: {{^}}store_local_v4i32:
+
; SI: ds_write_b32
; SI: ds_write_b32
; SI: ds_write_b32
@@ -326,19 +324,19 @@ entry:
; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
; be two 32-bit stores.
-; EG-LABEL: {{^}}vecload2:
+; FUNC-LABEL: {{^}}vecload2:
; EG: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}vecload2:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}vecload2:
+
; SI: buffer_store_dwordx2
define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
- %0 = load i32 addrspace(2)* %mem, align 4
- %arrayidx1.i = getelementptr inbounds i32 addrspace(2)* %mem, i64 1
- %1 = load i32 addrspace(2)* %arrayidx1.i, align 4
+ %0 = load i32, i32 addrspace(2)* %mem, align 4
+ %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
+ %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
store i32 %0, i32 addrspace(1)* %out, align 4
- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
ret void
}
@@ -362,11 +360,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
define void @i128-const-store(i32 addrspace(1)* %out) {
entry:
store i32 1, i32 addrspace(1)* %out, align 4
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 1, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2
+ %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
store i32 2, i32 addrspace(1)* %arrayidx4, align 4
- %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3
+ %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
store i32 2, i32 addrspace(1)* %arrayidx6, align 4
ret void
}
diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll
index 2197260..696fb03 100644
--- a/test/CodeGen/R600/store.r600.ll
+++ b/test/CodeGen/R600/store.r600.ll
@@ -7,7 +7,7 @@
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %1 = load <4 x i32> addrspace(1) * %in
+ %1 = load <4 x i32>, <4 x i32> addrspace(1) * %in
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
}
@@ -16,7 +16,7 @@ define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
; EG: {{^}}store_v4f32:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %1 = load <4 x float> addrspace(1) * %in
+ %1 = load <4 x float>, <4 x float> addrspace(1) * %in
store <4 x float> %1, <4 x float> addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index be48e18..03303f5 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -9,9 +9,9 @@ declare i32 @llvm.r600.read.tidig.x() readnone
; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = sub i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -26,9 +26,9 @@ define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = sub <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -46,9 +46,9 @@ define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)
; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = sub <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -80,10 +80,10 @@ define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind
; EG-DAG: SUB_INT
define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid
- %a = load i64 addrspace(1)* %a_ptr
- %b = load i64 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a_ptr
+ %b = load i64, i64 addrspace(1)* %b_ptr
%result = sub i64 %a, %b
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -96,10 +96,10 @@ define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias
; SI: v_subb_u32_e32
define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <2 x i64> addrspace(1)* %a_ptr
- %b = load <2 x i64> addrspace(1)* %b_ptr
+ %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
+ %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
+ %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%result = sub <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -116,10 +116,10 @@ define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(
; SI: v_subb_u32_e32
define void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <4 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <4 x i64> addrspace(1)* %a_ptr
- %b = load <4 x i64> addrspace(1)* %b_ptr
+ %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
+ %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
+ %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
%result = sub <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll
index 5eaca76..000ee2f 100644
--- a/test/CodeGen/R600/swizzle-export.ll
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -12,56 +12,56 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = extractelement <4 x float> %reg1, i32 2
%3 = extractelement <4 x float> %reg1, i32 3
- %4 = load <4 x float> addrspace(8)* null
+ %4 = load <4 x float>, <4 x float> addrspace(8)* null
%5 = extractelement <4 x float> %4, i32 1
- %6 = load <4 x float> addrspace(8)* null
+ %6 = load <4 x float>, <4 x float> addrspace(8)* null
%7 = extractelement <4 x float> %6, i32 2
- %8 = load <4 x float> addrspace(8)* null
+ %8 = load <4 x float>, <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 0
%10 = fmul float 0.000000e+00, %9
- %11 = load <4 x float> addrspace(8)* null
+ %11 = load <4 x float>, <4 x float> addrspace(8)* null
%12 = extractelement <4 x float> %11, i32 0
%13 = fmul float %5, %12
- %14 = load <4 x float> addrspace(8)* null
+ %14 = load <4 x float>, <4 x float> addrspace(8)* null
%15 = extractelement <4 x float> %14, i32 0
%16 = fmul float 0.000000e+00, %15
- %17 = load <4 x float> addrspace(8)* null
+ %17 = load <4 x float>, <4 x float> addrspace(8)* null
%18 = extractelement <4 x float> %17, i32 0
%19 = fmul float 0.000000e+00, %18
- %20 = load <4 x float> addrspace(8)* null
+ %20 = load <4 x float>, <4 x float> addrspace(8)* null
%21 = extractelement <4 x float> %20, i32 0
%22 = fmul float %7, %21
- %23 = load <4 x float> addrspace(8)* null
+ %23 = load <4 x float>, <4 x float> addrspace(8)* null
%24 = extractelement <4 x float> %23, i32 0
%25 = fmul float 0.000000e+00, %24
- %26 = load <4 x float> addrspace(8)* null
+ %26 = load <4 x float>, <4 x float> addrspace(8)* null
%27 = extractelement <4 x float> %26, i32 0
%28 = fmul float 0.000000e+00, %27
- %29 = load <4 x float> addrspace(8)* null
+ %29 = load <4 x float>, <4 x float> addrspace(8)* null
%30 = extractelement <4 x float> %29, i32 0
%31 = fmul float 0.000000e+00, %30
- %32 = load <4 x float> addrspace(8)* null
+ %32 = load <4 x float>, <4 x float> addrspace(8)* null
%33 = extractelement <4 x float> %32, i32 0
%34 = fmul float 0.000000e+00, %33
- %35 = load <4 x float> addrspace(8)* null
+ %35 = load <4 x float>, <4 x float> addrspace(8)* null
%36 = extractelement <4 x float> %35, i32 0
%37 = fmul float 0.000000e+00, %36
- %38 = load <4 x float> addrspace(8)* null
+ %38 = load <4 x float>, <4 x float> addrspace(8)* null
%39 = extractelement <4 x float> %38, i32 0
%40 = fmul float 1.000000e+00, %39
- %41 = load <4 x float> addrspace(8)* null
+ %41 = load <4 x float>, <4 x float> addrspace(8)* null
%42 = extractelement <4 x float> %41, i32 0
%43 = fmul float 0.000000e+00, %42
- %44 = load <4 x float> addrspace(8)* null
+ %44 = load <4 x float>, <4 x float> addrspace(8)* null
%45 = extractelement <4 x float> %44, i32 0
%46 = fmul float 0.000000e+00, %45
- %47 = load <4 x float> addrspace(8)* null
+ %47 = load <4 x float>, <4 x float> addrspace(8)* null
%48 = extractelement <4 x float> %47, i32 0
%49 = fmul float 0.000000e+00, %48
- %50 = load <4 x float> addrspace(8)* null
+ %50 = load <4 x float>, <4 x float> addrspace(8)* null
%51 = extractelement <4 x float> %50, i32 0
%52 = fmul float 0.000000e+00, %51
- %53 = load <4 x float> addrspace(8)* null
+ %53 = load <4 x float>, <4 x float> addrspace(8)* null
%54 = extractelement <4 x float> %53, i32 0
%55 = fmul float 1.000000e+00, %54
%56 = insertelement <4 x float> undef, float %0, i32 0
@@ -102,12 +102,12 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = fadd float %0, 2.5
%3 = fmul float %1, 3.5
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 0
%6 = call float @llvm.cos.f32(float %5)
- %7 = load <4 x float> addrspace(8)* null
+ %7 = load <4 x float>, <4 x float> addrspace(8)* null
%8 = extractelement <4 x float> %7, i32 0
- %9 = load <4 x float> addrspace(8)* null
+ %9 = load <4 x float>, <4 x float> addrspace(8)* null
%10 = extractelement <4 x float> %9, i32 1
%11 = insertelement <4 x float> undef, float %2, i32 0
%12 = insertelement <4 x float> %11, float %3, i32 1
diff --git a/test/CodeGen/R600/trunc-cmp-constant.ll b/test/CodeGen/R600/trunc-cmp-constant.ll
index a097ab0..dac7472 100644
--- a/test/CodeGen/R600/trunc-cmp-constant.ll
+++ b/test/CodeGen/R600/trunc-cmp-constant.ll
@@ -4,12 +4,12 @@
; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
-; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1{{$}}
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
+; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, -1{{$}}
; SI: v_cndmask_b32_e64
; SI: buffer_store_byte
define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp eq i32 %ext, 0
store i1 %cmp, i1 addrspace(1)* %out
@@ -20,12 +20,12 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspa
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
-; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
+; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp eq i32 %ext, 0
store i1 %cmp, i1 addrspace(1)* %out
@@ -36,7 +36,7 @@ define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspa
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
; SI: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp eq i32 %ext, 1
store i1 %cmp, i1 addrspace(1)* %out
@@ -48,7 +48,7 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspa
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp eq i32 %ext, 1
store i1 %cmp, i1 addrspace(1)* %out
@@ -60,7 +60,7 @@ define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspa
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp eq i32 %ext, -1
store i1 %cmp, i1 addrspace(1)* %out
@@ -71,7 +71,7 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addr
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
; SI: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp eq i32 %ext, -1
store i1 %cmp, i1 addrspace(1)* %out
@@ -84,7 +84,7 @@ define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addr
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp ne i32 %ext, 0
store i1 %cmp, i1 addrspace(1)* %out
@@ -96,7 +96,7 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspa
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp ne i32 %ext, 0
store i1 %cmp, i1 addrspace(1)* %out
@@ -107,7 +107,7 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspa
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
; SI: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp ne i32 %ext, 1
store i1 %cmp, i1 addrspace(1)* %out
@@ -117,12 +117,12 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
-; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
+; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp ne i32 %ext, 1
store i1 %cmp, i1 addrspace(1)* %out
@@ -137,7 +137,7 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa
; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
; XSI-NEXT: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp ne i32 %ext, -1
store i1 %cmp, i1 addrspace(1)* %out
@@ -148,7 +148,7 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addr
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
; SI: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp ne i32 %ext, -1
store i1 %cmp, i1 addrspace(1)* %out
@@ -157,11 +157,11 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addr
; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
; SI: buffer_load_sbyte [[LOAD:v[0-9]+]]
-; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[LOAD]], -1{{$}}
+; SI: v_cmp_ne_i32_e32 vcc, -1, [[LOAD]]{{$}}
; SI-NEXT: v_cndmask_b32_e64
; SI-NEXT: buffer_store_byte
define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %load = load i8 addrspace(1)* %in
+ %load = load i8, i8 addrspace(1)* %in
%masked = and i8 %load, 255
%ext = sext i8 %masked to i32
%cmp = icmp ne i32 %ext, -1
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index fa44264..bf690ca 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -53,7 +53,7 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64
; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: v_cmp_eq_i32
define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
- %a = load i32 addrspace(1)* %ptr, align 4
+ %a = load i32, i32 addrspace(1)* %ptr, align 4
%trunc = trunc i32 %a to i1
%result = select i1 %trunc, i32 1, i32 0
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -73,8 +73,8 @@ define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
; SI-LABEL: {{^}}s_trunc_i64_to_i1:
; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: v_and_b32_e64 [[MASKED:v[0-9]+]], 1, s[[SLO]]
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[MASKED]], 1
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, [[CMP]]
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
%trunc = trunc i64 %x to i1
%sel = select i1 %trunc, i32 63, i32 -12
@@ -85,13 +85,13 @@ define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
; SI-LABEL: {{^}}v_trunc_i64_to_i1:
; SI: buffer_load_dwordx2 v{{\[}}[[VLO:[0-9]+]]:{{[0-9]+\]}}
; SI: v_and_b32_e32 [[MASKED:v[0-9]+]], 1, v[[VLO]]
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[MASKED]], 1
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, [[CMP]]
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
define void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %x = load i64 addrspace(1)* %gep
+ %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %x = load i64, i64 addrspace(1)* %gep
%trunc = trunc i64 %x to i1
%sel = select i1 %trunc, i32 63, i32 -12
diff --git a/test/CodeGen/R600/tti-unroll-prefs.ll b/test/CodeGen/R600/tti-unroll-prefs.ll
index 0009c42..76c32af 100644
--- a/test/CodeGen/R600/tti-unroll-prefs.ll
+++ b/test/CodeGen/R600/tti-unroll-prefs.ll
@@ -39,7 +39,7 @@ if.then4: ; preds = %if.then4.lr.ph, %if
%add2 = add nsw i32 %b.addr.014, 1
%1 = sext i32 %b.addr.014 to i64
%add.ptr.sum = add nsw i64 %1, %0
- %add.ptr5 = getelementptr inbounds i8 addrspace(1)* %dst, i64 %add.ptr.sum
+ %add.ptr5 = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %add.ptr.sum
store i8 0, i8 addrspace(1)* %add.ptr5, align 1
%inc = add nsw i32 %i.015, 1
%cmp1 = icmp slt i32 %inc, 4
diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll
index 57d7835..9f38365 100644
--- a/test/CodeGen/R600/uaddo.ll
+++ b/test/CodeGen/R600/uaddo.ll
@@ -33,8 +33,8 @@ define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}v_uaddo_i32:
; SI: v_add_i32
define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %uadd, 0
%carry = extractvalue { i32, i1 } %uadd, 1
@@ -59,8 +59,8 @@ define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
; SI: v_add_i32
; SI: v_addc_u32
define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 4
- %b = load i64 addrspace(1)* %bptr, align 4
+ %a = load i64, i64 addrspace(1)* %aptr, align 4
+ %b = load i64, i64 addrspace(1)* %bptr, align 4
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %uadd, 0
%carry = extractvalue { i64, i1 } %uadd, 1
diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll
index 0c2c65b..de22a22 100644
--- a/test/CodeGen/R600/udiv.ll
+++ b/test/CodeGen/R600/udiv.ll
@@ -7,9 +7,9 @@
;EG: CF_END
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1) * %in
- %b = load i32 addrspace(1) * %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1) * %in
+ %b = load i32, i32 addrspace(1) * %b_ptr
%result = udiv i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -25,9 +25,9 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;SI: s_endpgm
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = udiv <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -39,9 +39,9 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
;SI: s_endpgm
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = udiv <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/udivrem24.ll b/test/CodeGen/R600/udivrem24.ll
index 4b98ac6..4de881b 100644
--- a/test/CodeGen/R600/udivrem24.ll
+++ b/test/CodeGen/R600/udivrem24.ll
@@ -13,9 +13,9 @@
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
- %num = load i8 addrspace(1) * %in
- %den = load i8 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
%result = udiv i8 %num, %den
store i8 %result, i8 addrspace(1)* %out
ret void
@@ -32,9 +32,9 @@ define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
- %num = load i16 addrspace(1) * %in, align 2
- %den = load i16 addrspace(1) * %den_ptr, align 2
+ %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %num = load i16, i16 addrspace(1) * %in, align 2
+ %den = load i16, i16 addrspace(1) * %den_ptr, align 2
%result = udiv i16 %num, %den
store i16 %result, i16 addrspace(1)* %out, align 2
ret void
@@ -51,9 +51,9 @@ define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 8
%num.i24 = lshr i32 %num.i24.0, 8
@@ -71,9 +71,9 @@ define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 7
%num.i24 = lshr i32 %num.i24.0, 7
@@ -91,9 +91,9 @@ define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 7
%num.i24 = lshr i32 %num.i24.0, 8
@@ -111,9 +111,9 @@ define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 8
%num.i24 = lshr i32 %num.i24.0, 7
@@ -134,9 +134,9 @@ define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
- %num = load i8 addrspace(1) * %in
- %den = load i8 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
%result = urem i8 %num, %den
store i8 %result, i8 addrspace(1)* %out
ret void
@@ -153,9 +153,9 @@ define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
- %num = load i16 addrspace(1) * %in, align 2
- %den = load i16 addrspace(1) * %den_ptr, align 2
+ %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %num = load i16, i16 addrspace(1) * %in, align 2
+ %den = load i16, i16 addrspace(1) * %den_ptr, align 2
%result = urem i16 %num, %den
store i16 %result, i16 addrspace(1)* %out, align 2
ret void
@@ -172,9 +172,9 @@ define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 8
%num.i24 = lshr i32 %num.i24.0, 8
@@ -192,9 +192,9 @@ define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 7
%num.i24 = lshr i32 %num.i24.0, 7
@@ -212,9 +212,9 @@ define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 7
%num.i24 = lshr i32 %num.i24.0, 8
@@ -232,9 +232,9 @@ define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 8
%num.i24 = lshr i32 %num.i24.0, 7
diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll
index f715243..dfec8eb 100644
--- a/test/CodeGen/R600/uint_to_fp.f64.ll
+++ b/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -11,8 +11,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
- %val = load i64 addrspace(1)* %gep, align 8
+ %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep, align 8
%result = uitofp i64 %val to double
store double %result, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
index 1c8a175..00fea80 100644
--- a/test/CodeGen/R600/uint_to_fp.ll
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -38,7 +38,7 @@ define void @uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32>
; SI: v_cvt_f32_u32_e32
; SI: s_endpgm
define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %value = load <4 x i32> addrspace(1) * %in
+ %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = uitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
@@ -50,7 +50,7 @@ define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32>
; R600: MULADD_IEEE
; SI: v_cvt_f32_u32_e32
; SI: v_cvt_f32_u32_e32
-; SI: v_madmk_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4f800000
+; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4f800000
; SI: s_endpgm
define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) {
entry:
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
index 665dc37..efb1de2 100644
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -8,7 +8,7 @@
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind {
- %v = load i16 addrspace(3)* %p, align 1
+ %v = load i16, i16 addrspace(3)* %p, align 1
store i16 %v, i16 addrspace(3)* %r, align 1
ret void
}
@@ -20,7 +20,7 @@ define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(
; SI: buffer_store_byte
; SI: s_endpgm
define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind {
- %v = load i16 addrspace(1)* %p, align 1
+ %v = load i16, i16 addrspace(1)* %p, align 1
store i16 %v, i16 addrspace(1)* %r, align 1
ret void
}
@@ -36,7 +36,7 @@ define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
- %v = load i32 addrspace(3)* %p, align 1
+ %v = load i32, i32 addrspace(3)* %p, align 1
store i32 %v, i32 addrspace(3)* %r, align 1
ret void
}
@@ -51,7 +51,7 @@ define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(
; SI: buffer_store_byte
; SI: buffer_store_byte
define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
- %v = load i32 addrspace(1)* %p, align 1
+ %v = load i32, i32 addrspace(1)* %p, align 1
store i32 %v, i32 addrspace(1)* %r, align 1
ret void
}
@@ -75,7 +75,7 @@ define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
- %v = load i64 addrspace(3)* %p, align 1
+ %v = load i64, i64 addrspace(3)* %p, align 1
store i64 %v, i64 addrspace(3)* %r, align 1
ret void
}
@@ -98,7 +98,7 @@ define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(
; SI: buffer_store_byte
; SI: buffer_store_byte
define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
- %v = load i64 addrspace(1)* %p, align 1
+ %v = load i64, i64 addrspace(1)* %p, align 1
store i64 %v, i64 addrspace(1)* %r, align 1
ret void
}
@@ -145,7 +145,7 @@ define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
- %v = load <4 x i32> addrspace(3)* %p, align 1
+ %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
ret void
}
@@ -169,7 +169,7 @@ define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i
; FIXME-SI: buffer_load_ubyte
; FIXME-SI: buffer_load_ubyte
define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
- %v = load <4 x i32> addrspace(1)* %p, align 1
+ %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
ret void
}
@@ -178,7 +178,7 @@ define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x
; SI: ds_read2_b32
; SI: s_endpgm
define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %val = load i64 addrspace(3)* %in, align 4
+ %val = load i64, i64 addrspace(3)* %in, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -187,8 +187,8 @@ define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspac
; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
; SI: s_endpgm
define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %ptr = getelementptr i64 addrspace(3)* %in, i32 4
- %val = load i64 addrspace(3)* %ptr, align 4
+ %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
+ %val = load i64, i64 addrspace(3)* %ptr, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -199,9 +199,9 @@ define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out,
; SI: s_endpgm
define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
- %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
+ %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
%ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
- %val = load i64 addrspace(3)* %ptri64, align 4
+ %val = load i64, i64 addrspace(3)* %ptri64, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -219,7 +219,7 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture
; SI: s_endpgm
define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %val = load i64 addrspace(3)* %in, align 1
+ %val = load i64, i64 addrspace(3)* %in, align 1
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -236,7 +236,7 @@ define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
; SI: s_endpgm
define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
- %ptr = getelementptr i64 addrspace(3)* %out, i32 4
+ %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
store i64 0, i64 addrspace(3)* %ptr, align 4
ret void
}
@@ -247,7 +247,7 @@ define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
; SI: s_endpgm
define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
%ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
- %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
+ %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
%ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
store i64 0, i64 addrspace(3)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
index c615f0b..036a7e9 100644
--- a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
+++ b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
@@ -20,20 +20,20 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body, %for.body.lr.ph
%main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
%0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %main_stride
+ %1 = load i32, i32 addrspace(1)* %0, align 4
+ %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
%2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
+ %3 = load i32, i32 addrspace(1)* %2, align 4
+ %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
%4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
+ %5 = load i32, i32 addrspace(1)* %4, align 4
+ %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
%6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
+ %7 = load i32, i32 addrspace(1)* %6, align 4
+ %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
%8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 undef
+ %9 = load i32, i32 addrspace(1)* %8, align 4
+ %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
br i1 undef, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
@@ -56,20 +56,20 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body, %for.body.lr.ph
%main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
%0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %main_stride
+ %1 = load i32, i32 addrspace(1)* %0, align 4
+ %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
%2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
+ %3 = load i32, i32 addrspace(1)* %2, align 4
+ %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
%4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
+ %5 = load i32, i32 addrspace(1)* %4, align 4
+ %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
%6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
+ %7 = load i32, i32 addrspace(1)* %6, align 4
+ %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
%8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 undef
+ %9 = load i32, i32 addrspace(1)* %8, align 4
+ %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
br i1 undef, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
@@ -92,20 +92,20 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body, %for.body.lr.ph
%main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
%0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %main_stride
+ %1 = load i32, i32 addrspace(1)* %0, align 4
+ %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
%2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
+ %3 = load i32, i32 addrspace(1)* %2, align 4
+ %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
%4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
+ %5 = load i32, i32 addrspace(1)* %4, align 4
+ %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
%6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
+ %7 = load i32, i32 addrspace(1)* %6, align 4
+ %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
%8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 undef
+ %9 = load i32, i32 addrspace(1)* %8, align 4
+ %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
br i1 undef, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
diff --git a/test/CodeGen/R600/unroll.ll b/test/CodeGen/R600/unroll.ll
index e0035ea..ca8d822 100644
--- a/test/CodeGen/R600/unroll.ll
+++ b/test/CodeGen/R600/unroll.ll
@@ -20,7 +20,7 @@ loop.header:
br label %loop.body
loop.body:
- %ptr = getelementptr [32 x i32]* %0, i32 0, i32 %counter
+ %ptr = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 %counter
store i32 %counter, i32* %ptr
br label %loop.inc
@@ -30,8 +30,8 @@ loop.inc:
br i1 %1, label %exit, label %loop.header
exit:
- %2 = getelementptr [32 x i32]* %0, i32 0, i32 5
- %3 = load i32* %2
+ %2 = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 5
+ %3 = load i32, i32* %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll
index aa2a3eb..62841ec 100644
--- a/test/CodeGen/R600/urem.ll
+++ b/test/CodeGen/R600/urem.ll
@@ -10,9 +10,9 @@
; SI: s_endpgm
; EG: CF_END
define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = urem i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -27,7 +27,7 @@ define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: buffer_store_dword
; SI: s_endpgm
define void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32 addrspace(1) * %in
+ %num = load i32, i32 addrspace(1) * %in
%result = urem i32 %num, 7
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -37,9 +37,9 @@ define void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: s_endpgm
; EG: CF_END
define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1)* %in
- %b = load <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
%result = urem <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -49,9 +49,9 @@ define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1
; SI: s_endpgm
; EG: CF_END
define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1)* %in
- %b = load <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
%result = urem <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -61,9 +61,9 @@ define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1
; SI: s_endpgm
; EG: CF_END
define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1)* %in
- %b = load i64 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %a = load i64, i64 addrspace(1)* %in
+ %b = load i64, i64 addrspace(1)* %b_ptr
%result = urem i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -73,9 +73,9 @@ define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
; SI: s_endpgm
; EG: CF_END
define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1)* %in
- %b = load <2 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
+ %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%result = urem <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -85,9 +85,9 @@ define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1
; SI: s_endpgm
; EG: CF_END
define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1)* %in
- %b = load <4 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
+ %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
%result = urem <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll
index be1e666..a753ca4 100644
--- a/test/CodeGen/R600/usubo.ll
+++ b/test/CodeGen/R600/usubo.ll
@@ -30,8 +30,8 @@ define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}v_usubo_i32:
; SI: v_subrev_i32_e32
define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %usub, 0
%carry = extractvalue { i32, i1 } %usub, 1
@@ -56,8 +56,8 @@ define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
; SI: v_sub_i32
; SI: v_subb_u32
define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 4
- %b = load i64 addrspace(1)* %bptr, align 4
+ %a = load i64, i64 addrspace(1)* %aptr, align 4
+ %b = load i64, i64 addrspace(1)* %bptr, align 4
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %usub, 0
%carry = extractvalue { i64, i1 } %usub, 1
diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll
index 85936ec..c368c5a 100644
--- a/test/CodeGen/R600/v_cndmask.ll
+++ b/test/CodeGen/R600/v_cndmask.ll
@@ -10,8 +10,8 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI: s_endpgm
define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
%idx = call i32 @llvm.r600.read.tidig.x() #1
- %f.gep = getelementptr float addrspace(1)* %fptr, i32 %idx
- %f = load float addrspace(1)* %fptr
+ %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
+ %f = load float, float addrspace(1)* %fptr
%setcc = icmp ne i32 %c, 0
%select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
store float %select, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll
index 5a3c2ec..7d0ebd1 100644
--- a/test/CodeGen/R600/valu-i1.ll
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -15,18 +15,18 @@ entry:
]
case0:
- %arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
store i32 0, i32 addrspace(1)* %arrayidx1, align 4
br label %end
case1:
- %arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
store i32 1, i32 addrspace(1)* %arrayidx5, align 4
br label %end
default:
%cmp8 = icmp eq i32 %a, 2
- %arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ %arrayidx10 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
br i1 %cmp8, label %if, label %else
if:
@@ -42,8 +42,8 @@ end:
}
; SI-LABEL: @simple_test_v_if
-; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
-; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
+; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
; SI: ; BB#1
@@ -59,7 +59,7 @@ define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1
br i1 %is.0, label %store, label %exit
store:
- %gep = getelementptr i32 addrspace(1)* %dst, i32 %tid
+ %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid
store i32 999, i32 addrspace(1)* %gep
ret void
@@ -68,8 +68,8 @@ exit:
}
; SI-LABEL: @simple_test_v_loop
-; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
-; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
+; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
; SI: s_cbranch_execz BB2_2
@@ -93,9 +93,9 @@ entry:
loop:
%i = phi i32 [%tid, %entry], [%i.inc, %loop]
- %gep.src = getelementptr i32 addrspace(1)* %src, i32 %i
- %gep.dst = getelementptr i32 addrspace(1)* %dst, i32 %i
- %load = load i32 addrspace(1)* %src
+ %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i
+ %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i
+ %load = load i32, i32 addrspace(1)* %src
store i32 %load, i32 addrspace(1)* %gep.dst
%i.inc = add nsw i32 %i, 1
%cmp = icmp eq i32 %limit, %i.inc
@@ -111,8 +111,8 @@ exit:
; Branch to exit if uniformly not taken
; SI: ; BB#0:
; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
-; SI: v_cmp_gt_i32_e64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]]
-; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG]], [[OUTER_CMP_SREG]]
+; SI: v_cmp_lt_i32_e32 vcc
+; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
; SI: s_cbranch_execz BB3_2
@@ -125,8 +125,8 @@ exit:
; SI: BB3_3:
; SI: buffer_load_dword [[B:v[0-9]+]]
; SI: buffer_load_dword [[A:v[0-9]+]]
-; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1
-; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1
+; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
+; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
@@ -154,8 +154,8 @@ define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addr
bb:
%tmp = tail call i32 @llvm.r600.read.tidig.x() #0
%tmp4 = sext i32 %tmp to i64
- %tmp5 = getelementptr inbounds i32 addrspace(1)* %arg3, i64 %tmp4
- %tmp6 = load i32 addrspace(1)* %tmp5, align 4
+ %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
+ %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
%tmp7 = icmp sgt i32 %tmp6, 0
%tmp8 = sext i32 %tmp6 to i64
br i1 %tmp7, label %bb10, label %bb26
@@ -163,10 +163,10 @@ bb:
bb10: ; preds = %bb, %bb20
%tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
%tmp12 = add nsw i64 %tmp11, %tmp4
- %tmp13 = getelementptr inbounds i32 addrspace(1)* %arg1, i64 %tmp12
- %tmp14 = load i32 addrspace(1)* %tmp13, align 4
- %tmp15 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp12
- %tmp16 = load i32 addrspace(1)* %tmp15, align 4
+ %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12
+ %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4
+ %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12
+ %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
%tmp17 = icmp ne i32 %tmp14, -1
%tmp18 = icmp ne i32 %tmp16, -1
%tmp19 = and i1 %tmp17, %tmp18
@@ -174,7 +174,7 @@ bb10: ; preds = %bb, %bb20
bb20: ; preds = %bb10
%tmp21 = add nsw i32 %tmp16, %tmp14
- %tmp22 = getelementptr inbounds i32 addrspace(1)* %arg, i64 %tmp12
+ %tmp22 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp12
store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4
%tmp23 = add nuw nsw i64 %tmp11, 1
%tmp24 = icmp slt i64 %tmp23, %tmp8
diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll
index 228868a..6f3b484 100644
--- a/test/CodeGen/R600/vector-alloca.ll
+++ b/test/CodeGen/R600/vector-alloca.ll
@@ -13,16 +13,16 @@
define void @vector_read(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [4 x i32]
- %x = getelementptr [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+ %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
store i32 0, i32* %x
store i32 1, i32* %y
store i32 2, i32* %z
store i32 3, i32* %w
- %1 = getelementptr [4 x i32]* %0, i32 0, i32 %index
- %2 = load i32* %1
+ %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %index
+ %2 = load i32, i32* %1
store i32 %2, i32 addrspace(1)* %out
ret void
}
@@ -37,18 +37,18 @@ entry:
define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
%0 = alloca [4 x i32]
- %x = getelementptr [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+ %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
store i32 0, i32* %x
store i32 0, i32* %y
store i32 0, i32* %z
store i32 0, i32* %w
- %1 = getelementptr [4 x i32]* %0, i32 0, i32 %w_index
+ %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %w_index
store i32 1, i32* %1
- %2 = getelementptr [4 x i32]* %0, i32 0, i32 %r_index
- %3 = load i32* %2
+ %2 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %r_index
+ %3 = load i32, i32* %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
@@ -60,18 +60,18 @@ entry:
define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
%0 = alloca [4 x i32]
- %x = getelementptr [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+ %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
store i32 0, i32* %x
store i32 0, i32* %y
store i32 0, i32* %z
store i32 0, i32* %w
- %1 = getelementptr [4 x i32]* %0, i32 0, i32 1
+ %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
%2 = bitcast i32* %1 to [4 x i32]*
- %3 = getelementptr [4 x i32]* %2, i32 0, i32 0
- %4 = load i32* %3
+ %3 = getelementptr [4 x i32], [4 x i32]* %2, i32 0, i32 0
+ %4 = load i32, i32* %3
store i32 %4, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/R600/vertex-fetch-encoding.ll
index e4d117f..fb6a17e 100644
--- a/test/CodeGen/R600/vertex-fetch-encoding.ll
+++ b/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -8,7 +8,7 @@
define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -19,7 +19,7 @@ entry:
define void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
entry:
- %0 = load <4 x i32> addrspace(1)* %in
+ %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in
store <4 x i32> %0, <4 x i32> addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/vop-shrink.ll b/test/CodeGen/R600/vop-shrink.ll
index d5a46e3..9b2f229 100644
--- a/test/CodeGen/R600/vop-shrink.ll
+++ b/test/CodeGen/R600/vop-shrink.ll
@@ -15,7 +15,7 @@ entry:
br i1 %tmp, label %if, label %else
if: ; preds = %entry
- %tmp1 = getelementptr i32 addrspace(1)* %out, i32 1
+ %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%tmp2 = extractelement <4 x i32> %sgpr, i32 1
store i32 %tmp2, i32 addrspace(1)* %out
br label %endif
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index a6152f7..a3014b0 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -12,8 +12,8 @@
define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
entry:
- %0 = load <2 x i32> addrspace(1)* %in0
- %1 = load <2 x i32> addrspace(1)* %in1
+ %0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
+ %1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
%cmp = icmp ne <2 x i32> %0, %1
%result = select <2 x i1> %cmp, <2 x i32> %0, <2 x i32> %1
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
@@ -30,8 +30,8 @@ entry:
define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
entry:
- %0 = load <2 x float> addrspace(1)* %in0
- %1 = load <2 x float> addrspace(1)* %in1
+ %0 = load <2 x float>, <2 x float> addrspace(1)* %in0
+ %1 = load <2 x float>, <2 x float> addrspace(1)* %in1
%cmp = fcmp une <2 x float> %0, %1
%result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
store <2 x float> %result, <2 x float> addrspace(1)* %out
@@ -52,8 +52,8 @@ entry:
define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
entry:
- %0 = load <4 x i32> addrspace(1)* %in0
- %1 = load <4 x i32> addrspace(1)* %in1
+ %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
+ %1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
%cmp = icmp ne <4 x i32> %0, %1
%result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
@@ -68,8 +68,8 @@ entry:
define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
entry:
- %0 = load <4 x float> addrspace(1)* %in0
- %1 = load <4 x float> addrspace(1)* %in1
+ %0 = load <4 x float>, <4 x float> addrspace(1)* %in0
+ %1 = load <4 x float>, <4 x float> addrspace(1)* %in1
%cmp = fcmp une <4 x float> %0, %1
%result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
store <4 x float> %result, <4 x float> addrspace(1)* %out
diff --git a/test/CodeGen/R600/vtx-fetch-branch.ll b/test/CodeGen/R600/vtx-fetch-branch.ll
index bcbe34e..4584d6e 100644
--- a/test/CodeGen/R600/vtx-fetch-branch.ll
+++ b/test/CodeGen/R600/vtx-fetch-branch.ll
@@ -16,7 +16,7 @@ entry:
br i1 %0, label %endif, label %if
if:
- %1 = load i32 addrspace(1)* %in
+ %1 = load i32, i32 addrspace(1)* %in
br label %endif
endif:
diff --git a/test/CodeGen/R600/vtx-schedule.ll b/test/CodeGen/R600/vtx-schedule.ll
index 8254c99..912e258 100644
--- a/test/CodeGen/R600/vtx-schedule.ll
+++ b/test/CodeGen/R600/vtx-schedule.ll
@@ -11,8 +11,8 @@
; CHECK: VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0
define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* addrspace(1)* nocapture %in0) {
entry:
- %0 = load i32 addrspace(1)* addrspace(1)* %in0
- %1 = load i32 addrspace(1)* %0
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in0
+ %1 = load i32, i32 addrspace(1)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll
index 43561aa..5cc7577 100644
--- a/test/CodeGen/R600/wait.ll
+++ b/test/CodeGen/R600/wait.ll
@@ -8,17 +8,17 @@
; CHECK: s_endpgm
define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
main_body:
- %tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0
- %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
+ %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
%tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6)
%tmp12 = extractelement <4 x float> %tmp11, i32 0
%tmp13 = extractelement <4 x float> %tmp11, i32 1
call void @llvm.AMDGPU.barrier.global() #1
%tmp14 = extractelement <4 x float> %tmp11, i32 2
; %tmp15 = extractelement <4 x float> %tmp11, i32 3
- %tmp15 = load float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
- %tmp16 = getelementptr <16 x i8> addrspace(2)* %arg3, i32 1
- %tmp17 = load <16 x i8> addrspace(2)* %tmp16, !tbaa !0
+ %tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
+ %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
+ %tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
%tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
%tmp19 = extractelement <4 x float> %tmp18, i32 0
%tmp20 = extractelement <4 x float> %tmp18, i32 1
diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
index 4e77c07..5ab4653 100644
--- a/test/CodeGen/R600/wrong-transalu-pos-fix.ll
+++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
@@ -35,7 +35,7 @@ entry:
%z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
%add.i = add i32 %z.i8.i, %mul33.i
%add13 = add i32 %add.i, %add
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add13
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add13
store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4
ret void
}
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index 1526e28..089db59 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -11,8 +11,8 @@
; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
- %a = load <2 x i32> addrspace(1) * %in0
- %b = load <2 x i32> addrspace(1) * %in1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in0
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %in1
%result = xor <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -30,8 +30,8 @@ define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
- %a = load <4 x i32> addrspace(1) * %in0
- %b = load <4 x i32> addrspace(1) * %in1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in0
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %in1
%result = xor <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -40,15 +40,15 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
; FUNC-LABEL: {{^}}xor_i1:
; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
-; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0
-; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0
+; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
+; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}
; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
- %a = load float addrspace(1) * %in0
- %b = load float addrspace(1) * %in1
+ %a = load float, float addrspace(1) * %in0
+ %b = load float, float addrspace(1) * %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 1.000000e+00
%xor = xor i1 %acmp, %bcmp
@@ -64,8 +64,8 @@ define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float ad
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]]
; SI: buffer_store_byte [[RESULT]]
define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
- %a = load i1 addrspace(1)* %in0
- %b = load i1 addrspace(1)* %in1
+ %a = load i1, i1 addrspace(1)* %in0
+ %b = load i1, i1 addrspace(1)* %in1
%xor = xor i1 %a, %b
store i1 %xor, i1 addrspace(1)* %out
ret void
@@ -74,8 +74,8 @@ define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace
; FUNC-LABEL: {{^}}vector_xor_i32:
; SI: v_xor_b32_e32
define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
- %a = load i32 addrspace(1)* %in0
- %b = load i32 addrspace(1)* %in1
+ %a = load i32, i32 addrspace(1)* %in0
+ %b = load i32, i32 addrspace(1)* %in1
%result = xor i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -100,8 +100,8 @@ define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) {
; FUNC-LABEL: {{^}}vector_not_i32:
; SI: v_not_b32
define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
- %a = load i32 addrspace(1)* %in0
- %b = load i32 addrspace(1)* %in1
+ %a = load i32, i32 addrspace(1)* %in0
+ %b = load i32, i32 addrspace(1)* %in1
%result = xor i32 %a, -1
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -112,8 +112,8 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
; SI: v_xor_b32_e32
; SI: s_endpgm
define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
- %a = load i64 addrspace(1)* %in0
- %b = load i64 addrspace(1)* %in1
+ %a = load i64, i64 addrspace(1)* %in0
+ %b = load i64, i64 addrspace(1)* %in1
%result = xor i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -140,8 +140,8 @@ define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) {
; SI: v_not_b32
; SI: v_not_b32
define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
- %a = load i64 addrspace(1)* %in0
- %b = load i64 addrspace(1)* %in1
+ %a = load i64, i64 addrspace(1)* %in0
+ %b = load i64, i64 addrspace(1)* %in1
%result = xor i64 %a, -1
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -163,7 +163,7 @@ if:
br label %endif
else:
- %2 = load i64 addrspace(1)* %in
+ %2 = load i64, i64 addrspace(1)* %in
br label %endif
endif: