diff options
Diffstat (limited to 'test')
81 files changed, 1733 insertions, 812 deletions
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2db58b9..9fc76a9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -71,6 +71,8 @@ if(PYTHONINTERP_FOUND) MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit) # Configuration-time: See Unit/lit.site.cfg.in + set(LLVM_BUILD_MODE "%(build_mode)s") + set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}) set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR}) set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s") @@ -79,17 +81,16 @@ if(PYTHONINTERP_FOUND) set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED}) set(SHLIBPATH_VAR ${SHLIBPATH_VAR}) - # lit.site.cfg uses the config-time build mode - set(LLVM_BUILD_MODE "${LLVM_BUILD_MODE}") + if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE) + set(ENABLE_ASSERTIONS "1") + else() + set(ENABLE_ASSERTIONS "0") + endif() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg @ONLY) - - # Unit/lit.site.cfg substitutes the runtime build_mode - set(LLVM_BUILD_MODE "%(build_mode)s") - configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll new file mode 100644 index 0000000..1b5b8a9 --- /dev/null +++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s +; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2 + +%struct.config = type { i16, i16, i16, i16 } + +@prev = external global [0 x i16] +@max_lazy_match = internal unnamed_addr global i32 0, align 4 +@read_buf = external global i32 (i8*, i32)* +@window = external global [0 x i8] +@lookahead = internal unnamed_addr global i32 0, align 4 +@eofile.b = internal unnamed_addr global i1 false +@ins_h = internal unnamed_addr global i32 0, align 4 diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll index 0a7bb6c..396de37 100644 --- a/test/CodeGen/ARM/arm-modifier.ll +++ b/test/CodeGen/ARM/arm-modifier.ll @@ -46,9 +46,9 @@ ret void define void @f3() nounwind { entry: ; CHECK: f3 -; CHECK: stm r{{[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}} -; CHECK: adds lr, [[REG1]] -; CHECK: ldm r{{[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK: stm {{lr|r[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}} +; CHECK: adds {{lr|r[0-9]+}}, [[REG1]] +; CHECK: ldm {{lr|r[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}} %tmp = load i64* @f3_var, align 4 %tmp1 = load i64* @f3_var2, align 4 %0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll index 9b90408..06b459e 100644 --- a/test/CodeGen/ARM/carry.ll +++ b/test/CodeGen/ARM/carry.ll @@ -24,7 +24,6 @@ entry: define i64 @f3(i32 %al, i32 %bl) { ; CHECK: f3: ; CHECK: adds r -; CHECK: adcs r ; CHECK: adc r entry: ; unsigned wide add diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll index 58687b9..853585d 100644 --- a/test/CodeGen/ARM/inlineasm3.ll +++ b/test/CodeGen/ARM/inlineasm3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon,+v6t2 | FileCheck %s ; Radar 7449043 %struct.int32x4_t = type { <4 x i32> } @@ -58,3 +58,43 @@ entry: call void asm sideeffect "flds s15, $0 \0A", "*^Uvm,~{s15}"(float* @k.2126) nounwind ret i32 0 } + +; Radar 9307836 & 9119939 + +define float @t6(float %y) nounwind { +entry: +; CHECK: t6 +; CHECK: flds s15, s0 + %0 = tail call float asm "flds s15, $0", "=x"() nounwind + ret float %0 +} + +; Radar 9307836 & 9119939 + +define double @t7(double %y) nounwind { +entry: +; CHECK: t7 +; CHECK: flds s15, d0 + %0 = tail call double asm "flds s15, $0", "=x"() nounwind + ret double %0 +} + +; Radar 9307836 & 9119939 + +define float @t8(float %y) nounwind { +entry: +; CHECK: t8 +; CHECK: flds s15, s0 + %0 = tail call float asm "flds s15, $0", "=t"() nounwind + ret float %0 +} + +; Radar 9307836 & 9119939 + +define i32 @t9(i32 %r0) nounwind { +entry: +; CHECK: t9 +; CHECK: movw r0, #27182 + %0 = tail call i32 asm "movw $0, $1", "=r,j"(i32 27182) nounwind + ret i32 %0 +} diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll new file mode 100644 index 0000000..3009e50 --- /dev/null +++ b/test/CodeGen/ARM/vcvt_combine.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s + +@in = global float 0x400921FA00000000, align 4 + +; Test signed conversion. +; CHECK: t0 +; CHECK-NOT: vmul +define void @t0() nounwind { +entry: + %tmp = load float* @in, align 4, !tbaa !0 + %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 + %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 + %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00> + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> + tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind + ret void +} + +declare void @foo_int32x2_t(<2 x i32>) + +; Test unsigned conversion. +; CHECK: t1 +; CHECK-NOT: vmul +define void @t1() nounwind { +entry: + %tmp = load float* @in, align 4, !tbaa !0 + %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 + %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 + %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00> + %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> + tail call void @foo_uint32x2_t(<2 x i32> %vcvt.i) nounwind + ret void +} + +declare void @foo_uint32x2_t(<2 x i32>) + +; Test which should not fold due to non-power of 2. +; CHECK: t2 +; CHECK: vmul +define void @t2() nounwind { +entry: + %tmp = load float* @in, align 4, !tbaa !0 + %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 + %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 + %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000> + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> + tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind + ret void +} + +; Test which should not fold due to power of 2 out of range. +; CHECK: t3 +; CHECK: vmul +define void @t3() nounwind { +entry: + %tmp = load float* @in, align 4, !tbaa !0 + %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 + %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 + %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000> + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> + tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind + ret void +} + +; Test which case where const is max power of 2 (i.e., 2^32). +; CHECK: t4 +; CHECK-NOT: vmul +define void @t4() nounwind { +entry: + %tmp = load float* @in, align 4, !tbaa !0 + %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 + %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 + %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000> + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> + tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind + ret void +} + +; Test quadword. +; CHECK: t5 +; CHECK-NOT: vmul +define void @t5() nounwind { +entry: + %tmp = load float* @in, align 4, !tbaa !0 + %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0 + %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1 + %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2 + %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %tmp, i32 3 + %mul.i = fmul <4 x float> %vecinit6.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00> + %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32> + tail call void @foo_int32x4_t(<4 x i32> %vcvt.i) nounwind + ret void +} + +declare void @foo_int32x4_t(<4 x i32>) + +!0 = metadata !{metadata !"float", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll new file mode 100644 index 0000000..1387393 --- /dev/null +++ b/test/CodeGen/ARM/vdiv_combine.ll @@ -0,0 +1,102 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s + +@in = global float 0x400921FA00000000, align 4 +@iin = global i32 -1023, align 4 +@uin = global i32 1023, align 4 + +declare void @foo_int32x4_t(<4 x i32>) + +; Test signed conversion. +; CHECK: t1 +; CHECK-NOT: vdiv +define void @t1() nounwind { +entry: + %tmp = load i32* @iin, align 4, !tbaa !3 + %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 + %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 + %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00> + tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind + ret void +} + +declare void @foo_float32x2_t(<2 x float>) + +; Test unsigned conversion. +; CHECK: t2 +; CHECK-NOT: vdiv +define void @t2() nounwind { +entry: + %tmp = load i32* @uin, align 4, !tbaa !3 + %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 + %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 + %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00> + tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind + ret void +} + +; Test which should not fold due to non-power of 2. +; CHECK: t3 +; CHECK: vdiv +define void @t3() nounwind { +entry: + %tmp = load i32* @iin, align 4, !tbaa !3 + %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 + %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 + %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, <float 0x401B333340000000, float 0x401B333340000000> + tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind + ret void +} + +; Test which should not fold due to power of 2 out of range. +; CHECK: t4 +; CHECK: vdiv +define void @t4() nounwind { +entry: + %tmp = load i32* @iin, align 4, !tbaa !3 + %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 + %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 + %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000> + tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind + ret void +} + +; Test case where const is max power of 2 (i.e., 2^32). +; CHECK: t5 +; CHECK-NOT: vdiv +define void @t5() nounwind { +entry: + %tmp = load i32* @iin, align 4, !tbaa !3 + %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 + %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 + %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000> + tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind + ret void +} + +; Test quadword. +; CHECK: t6 +; CHECK-NOT: vdiv +define void @t6() nounwind { +entry: + %tmp = load i32* @iin, align 4, !tbaa !3 + %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0 + %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1 + %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2 + %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %tmp, i32 3 + %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00> + tail call void @foo_float32x4_t(<4 x float> %div.i) nounwind + ret void +} + +declare void @foo_float32x4_t(<4 x float>) + +!0 = metadata !{metadata !"float", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} +!3 = metadata !{metadata !"int", metadata !1} diff --git a/test/CodeGen/Generic/legalize-dbg-value.ll b/test/CodeGen/Generic/legalize-dbg-value.ll deleted file mode 100644 index b71aa8a..0000000 --- a/test/CodeGen/Generic/legalize-dbg-value.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -o /dev/null - -; llvm.dbg.value instructions can have types which are not legal for the -; target. CodeGen should handle this. - -define i128 @__mulvti3(i128 %a, i128 %b) nounwind { -entry: - tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11 - unreachable -} - -declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone - -!0 = metadata !{i128 170141183460469231731687303715884105727} -!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ] -!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ] -!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__mulvti3", metadata !"__mulvti3", metadata !"__mulvti3", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ] -!4 = metadata !{i32 524329, metadata !"mulvti3.c", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !5} ; [ DW_TAG_file_type ] -!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"mulvti3.c", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2328)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] -!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] -!7 = metadata !{metadata !8, metadata !8, metadata !8} -!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] -!9 = metadata !{i32 524329, metadata !"int_lib.h", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !5} ; [ DW_TAG_file_type ] -!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!11 = metadata !{i32 29, i32 0, metadata !2, null} diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll index 91efd68..4795e47 100644 --- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll +++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll @@ -7,7 +7,7 @@ ; RUN: not grep {sbss} %t1 ; RUN: not grep {gp_rel} %t1 ; RUN: grep {\%hi} %t1 | count 2 -; RUN: grep {\%lo} %t1 | count 2 +; RUN: grep {\%lo} %t1 | count 3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll index ff503ec..fb4f56c 100644 --- a/test/CodeGen/Mips/alloca.ll +++ b/test/CodeGen/Mips/alloca.ll @@ -8,11 +8,9 @@ entry: ; CHECK: subu $[[T2:[0-9]+]], $sp, $[[SZ]] ; CHECK: addu $sp, $zero, $[[T2]] ; CHECK: addiu $[[T3:[0-9]+]], $sp, [[OFF]] -; CHECK: lw $25, %call16(foo)($gp) -; CHECK: addu $4, $zero, $[[T1]] -; CHECK: jalr $25 -; CHECK: lw $25, %call16(foo)($gp) -; CHECK: addu $4, $zero, $[[T3]] +; CHECK: lw $[[T4:[0-9]+]], %call16(foo)($gp) +; CHECK: addu $25, $zero, $[[T4]] +; CHECK: addu $4, $zero, $[[T1]] ; CHECK: jalr $25 %tmp1 = alloca i8, i32 %size, align 4 %add.ptr = getelementptr inbounds i8* %tmp1, i32 5 diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll index 9a30453..560f2e9 100644 --- a/test/CodeGen/Mips/i64arg.ll +++ b/test/CodeGen/Mips/i64arg.ll @@ -10,8 +10,8 @@ entry: ; CHECK: jalr tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind ; CHECK: lw $25, %call16(ff2) -; CHECK: lw $[[R2:[0-9]+]], 80($sp) -; CHECK: lw $[[R3:[0-9]+]], 84($sp) +; CHECK: lw $[[R2:[0-9]+]], 88($sp) +; CHECK: lw $[[R3:[0-9]+]], 92($sp) ; CHECK: addu $4, $zero, $[[R2]] ; CHECK: addu $5, $zero, $[[R3]] ; CHECK: jalr $25 diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll index 50d0993..c2a4e5c 100644 --- a/test/CodeGen/Mips/internalfunc.ll +++ b/test/CodeGen/Mips/internalfunc.ll @@ -15,7 +15,7 @@ entry: define void @caller(i32 %a0, i32 %a1) nounwind { entry: ; CHECK: lw $[[R1:[0-9]+]], %got(caller.sf1)($gp) -; CHECK: addiu ${{[0-9]+}}, $[[R1]], %lo(caller.sf1) +; CHECK: lw $25, %lo(caller.sf1)($[[R1]]) %tobool = icmp eq i32 %a1, 0 br i1 %tobool, label %if.end, label %if.then @@ -26,9 +26,9 @@ if.then: ; preds = %entry if.end: ; preds = %entry, %if.then ; CHECK: lw $[[R2:[0-9]+]], %got(sf2)($gp) -; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)($gp) ; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2) -; CHECK: addiu ${{[0-9]+}}, $[[R3]], %lo(caller.sf1) +; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)($gp) +; CHECK: sw ${{[0-9]+}}, %lo(caller.sf1)($[[R3]]) %tobool3 = icmp ne i32 %a0, 0 %tmp4 = load void (...)** @gf1, align 4 %cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*) diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll index fd7ae9e..fcc20f7 100644 --- a/test/CodeGen/Mips/largeimmprinting.ll +++ b/test/CodeGen/Mips/largeimmprinting.ll @@ -8,7 +8,7 @@ define void @f() nounwind { entry: ; CHECK: lui $at, 65534 ; CHECK: addu $at, $sp, $at -; CHECK: addiu $sp, $at, -16 +; CHECK: addiu $sp, $at, -24 ; CHECK: .cprestore 65536 %agg.tmp = alloca %struct.S1, align 1 diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll index b78c393..f5e1a87 100644 --- a/test/CodeGen/Mips/o32_cc_byval.ll +++ b/test/CodeGen/Mips/o32_cc_byval.ll @@ -24,7 +24,7 @@ entry: ; CHECK: sw $[[R4]], 28($sp) ; CHECK: sw $[[R5]], 32($sp) ; CHECK: sw $[[R6]], 36($sp) -; CHECK: lw $6, 0($[[R0]]) +; CHECK: lw $6, %lo(f1.s1)($[[R1]]) ; CHECK: lw $7, 4($[[R0]]) %agg.tmp10 = alloca %struct.S3, align 4 call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll index c16be49..293aebe 100644 --- a/test/CodeGen/PTX/add.ll +++ b/test/CodeGen/PTX/add.ll @@ -1,70 +1,70 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16(i16 %x, i16 %y) { -; CHECK: add.u16 rh0, rh1, rh2; +; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %z = add i16 %x, %y ret i16 %z } define ptx_device i32 @t1_u32(i32 %x, i32 %y) { -; CHECK: add.u32 r0, r1, r2; +; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %z = add i32 %x, %y ret i32 %z } define ptx_device i64 @t1_u64(i64 %x, i64 %y) { -; CHECK: add.u64 rd0, rd1, rd2; +; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %z = add i64 %x, %y ret i64 %z } define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: add.rn.f32 r0, r1, r2 +; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NEXT: ret; %z = fadd float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: add.rn.f64 rd0, rd1, rd2 +; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} ; CHECK-NEXT: ret; %z = fadd double %x, %y ret double %z } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: add.u16 rh0, rh1, 1; +; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, 1; ; CHECK-NEXT: ret; %z = add i16 %x, 1 ret i16 %z } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: add.u32 r0, r1, 1; +; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, 1; ; CHECK-NEXT: ret; %z = add i32 %x, 1 ret i32 %z } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: add.u64 rd0, rd1, 1; +; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, 1; ; CHECK-NEXT: ret; %z = add i64 %x, 1 ret i64 %z } define ptx_device float @t2_f32(float %x) { -; CHECK: add.rn.f32 r0, r1, 0F3F800000; +; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F3F800000; ; CHECK-NEXT: ret; %z = fadd float %x, 1.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: add.rn.f64 rd0, rd1, 0D3FF0000000000000; +; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D3FF0000000000000; ; CHECK-NEXT: ret; %z = fadd double %x, 1.0 ret double %z diff --git a/test/CodeGen/PTX/aggregates.ll b/test/CodeGen/PTX/aggregates.ll new file mode 100644 index 0000000..23f28a7 --- /dev/null +++ b/test/CodeGen/PTX/aggregates.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s + +%complex = type { float, float } + +define ptx_device %complex @complex_add(%complex %a, %complex %b) { +entry: +; CHECK: ld.param.f32 r[[R0:[0-9]+]], [__param_1]; +; CHECK-NEXT: ld.param.f32 r[[R2:[0-9]+]], [__param_3]; +; CHECK-NEXT: ld.param.f32 r[[R1:[0-9]+]], [__param_2]; +; CHECK-NEXT: ld.param.f32 r[[R3:[0-9]+]], [__param_4]; +; CHECK-NEXT: add.rn.f32 r[[R0]], r[[R0]], r[[R2]]; +; CHECK-NEXT: add.rn.f32 r[[R1]], r[[R1]], r[[R3]]; +; CHECK-NEXT: ret; + %a.real = extractvalue %complex %a, 0 + %a.imag = extractvalue %complex %a, 1 + %b.real = extractvalue %complex %b, 0 + %b.imag = extractvalue %complex %b, 1 + %ret.real = fadd float %a.real, %b.real + %ret.imag = fadd float %a.imag, %b.imag + %ret.0 = insertvalue %complex undef, float %ret.real, 0 + %ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1 + ret %complex %ret.1 +} diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll index dbc77e5..3859280 100644 --- a/test/CodeGen/PTX/bitwise.ll +++ b/test/CodeGen/PTX/bitwise.ll @@ -3,21 +3,21 @@ ; preds define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) { -; CHECK: and.pred p0, p1, p2 +; CHECK: and.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} %c = and i1 %x, %y %d = zext i1 %c to i32 ret i32 %d } define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) { -; CHECK: or.pred p0, p1, p2 +; CHECK: or.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} %a = or i1 %x, %y %b = zext i1 %a to i32 ret i32 %b } define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) { -; CHECK: xor.pred p0, p1, p2 +; CHECK: xor.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}} %a = xor i1 %x, %y %b = zext i1 %a to i32 ret i32 %b diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll index 49383eb..7cc9444 100644 --- a/test/CodeGen/PTX/bra.ll +++ b/test/CodeGen/PTX/bra.ll @@ -10,15 +10,15 @@ loop: define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) { entry: -; CHECK: setp.le.u32 p0, r1, r2 +; CHECK: setp.le.u32 p0, r[[R0:[0-9]+]], r[[R1:[0-9]+]] %p = icmp ugt i32 %x, %y ; CHECK-NEXT: @p0 bra ; CHECK-NOT: bra br i1 %p, label %clause.if, label %clause.else clause.if: -; CHECK: mov.u32 r0, r1 +; CHECK: mov.u32 r{{[0-9]+}}, r[[R0]] ret i32 %x clause.else: -; CHECK: mov.u32 r0, r2 +; CHECK: mov.u32 r{{[0-9]+}}, r[[R1]] ret i32 %y } diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll index f723369..853abaf 100644 --- a/test/CodeGen/PTX/cvt.ll +++ b/test/CodeGen/PTX/cvt.ll @@ -4,9 +4,9 @@ ; (note: we convert back to i32 to return) define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) { -; CHECK: setp.gt.b16 p0, rh1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = trunc i16 %x to i1 %b = and i1 %a, %y @@ -15,9 +15,9 @@ define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) { } define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) { -; CHECK: setp.gt.b32 p0, r1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = trunc i32 %x to i1 %b = and i1 %a, %y @@ -26,9 +26,9 @@ define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) { } define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) { -; CHECK: setp.gt.b64 p0, rd1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = trunc i64 %x to i1 %b = and i1 %a, %y @@ -37,9 +37,9 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) { } define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { -; CHECK: setp.gt.b32 p0, r1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.f32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = fptoui float %x to i1 %b = and i1 %a, %y @@ -48,9 +48,9 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { } define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { -; CHECK: setp.gt.b64 p0, rd1, 0 -; CHECK-NEXT: and.pred p0, p0, p1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.f64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; ; CHECK-NEXT: ret; %a = fptoui double %x to i1 %b = and i1 %a, %y @@ -61,35 +61,35 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { ; i16 define ptx_device i16 @cvt_i16_preds(i1 %x) { -; CHECK: selp.u16 rh0, 1, 0, p1; +; CHECK: selp.u16 rh{{[0-9]+}}, 1, 0, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i1 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_i32(i32 %x) { -; CHECK: cvt.u16.u32 rh0, r1; +; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = trunc i32 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_i64(i64 %x) { -; CHECK: cvt.u16.u64 rh0, rd1; +; CHECK: cvt.u16.u64 rh{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = trunc i64 %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_f32(float %x) { -; CHECK: cvt.rzi.u16.f32 rh0, r1; +; CHECK: cvt.rzi.u16.f32 rh{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui float %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_f64(double %x) { -; CHECK: cvt.rzi.u16.f64 rh0, rd1; +; CHECK: cvt.rzi.u16.f64 rh{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui double %x to i16 ret i16 %a @@ -98,35 +98,35 @@ define ptx_device i16 @cvt_i16_f64(double %x) { ; i32 define ptx_device i32 @cvt_i32_preds(i1 %x) { -; CHECK: selp.u32 r0, 1, 0, p1; +; CHECK: selp.u32 r{{[0-9]+}}, 1, 0, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i1 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_i16(i16 %x) { -; CHECK: cvt.u32.u16 r0, rh1; +; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i16 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_i64(i64 %x) { -; CHECK: cvt.u32.u64 r0, rd1; +; CHECK: cvt.u32.u64 r{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = trunc i64 %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_f32(float %x) { -; CHECK: cvt.rzi.u32.f32 r0, r1; +; CHECK: cvt.rzi.u32.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui float %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_f64(double %x) { -; CHECK: cvt.rzi.u32.f64 r0, rd1; +; CHECK: cvt.rzi.u32.f64 r{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui double %x to i32 ret i32 %a @@ -135,35 +135,35 @@ define ptx_device i32 @cvt_i32_f64(double %x) { ; i64 define ptx_device i64 @cvt_i64_preds(i1 %x) { -; CHECK: selp.u64 rd0, 1, 0, p1; +; CHECK: selp.u64 rd{{[0-9]+}}, 1, 0, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i1 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_i16(i16 %x) { -; CHECK: cvt.u64.u16 rd0, rh1; +; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i16 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_i32(i32 %x) { -; CHECK: cvt.u64.u32 rd0, r1; +; CHECK: cvt.u64.u32 rd{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = zext i32 %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_f32(float %x) { -; CHECK: cvt.rzi.u64.f32 rd0, r1; +; CHECK: cvt.rzi.u64.f32 rd{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptoui float %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_f64(double %x) { -; CHECK: cvt.rzi.u64.f64 rd0, rd1; +; CHECK: cvt.rzi.u64.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK: ret; %a = fptoui double %x to i64 ret i64 %a @@ -172,35 +172,35 @@ define ptx_device i64 @cvt_i64_f64(double %x) { ; f32 define ptx_device float @cvt_f32_preds(i1 %x) { -; CHECK: selp.f32 r0, 0F3F800000, 0F00000000, p1; +; CHECK: selp.f32 r{{[0-9]+}}, 0F3F800000, 0F00000000, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i1 %x to float ret float %a } define ptx_device float @cvt_f32_i16(i16 %x) { -; CHECK: cvt.rn.f32.u16 r0, rh1; +; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i16 %x to float ret float %a } define ptx_device float @cvt_f32_i32(i32 %x) { -; CHECK: cvt.rn.f32.u32 r0, r1; +; CHECK: cvt.rn.f32.u32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i32 %x to float ret float %a } define ptx_device float @cvt_f32_i64(i64 %x) { -; CHECK: cvt.rn.f32.u64 r0, rd1; +; CHECK: cvt.rn.f32.u64 r{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i64 %x to float ret float %a } define ptx_device float @cvt_f32_f64(double %x) { -; CHECK: cvt.rn.f32.f64 r0, rd1; +; CHECK: cvt.rn.f32.f64 r{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fptrunc double %x to float ret float %a @@ -209,35 +209,35 @@ define ptx_device float @cvt_f32_f64(double %x) { ; f64 define ptx_device double @cvt_f64_preds(i1 %x) { -; CHECK: selp.f64 rd0, 0D3F80000000000000, 0D0000000000000000, p1; +; CHECK: selp.f64 rd{{[0-9]+}}, 0D3F80000000000000, 0D0000000000000000, p{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i1 %x to double ret double %a } define ptx_device double @cvt_f64_i16(i16 %x) { -; CHECK: cvt.rn.f64.u16 rd0, rh1; +; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i16 %x to double ret double %a } define ptx_device double @cvt_f64_i32(i32 %x) { -; CHECK: cvt.rn.f64.u32 rd0, r1; +; CHECK: cvt.rn.f64.u32 rd{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i32 %x to double ret double %a } define ptx_device double @cvt_f64_i64(i64 %x) { -; CHECK: cvt.rn.f64.u64 rd0, rd1; +; CHECK: cvt.rn.f64.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = uitofp i64 %x to double ret double %a } define ptx_device double @cvt_f64_f32(float %x) { -; CHECK: cvt.f64.f32 rd0, r1; +; CHECK: cvt.f64.f32 rd{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fpext float %x to double ret double %a diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll index eb32222..049d891 100644 --- a/test/CodeGen/PTX/fdiv-sm10.ll +++ b/test/CodeGen/PTX/fdiv-sm10.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.f32 r0, r1, r2; +; CHECK: div.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fdiv float %x, %y ret float %a } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.f64 rd0, rd1, rd2; +; CHECK: div.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fdiv double %x, %y ret double %a diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll index ad24f35..2d95339 100644 --- a/test/CodeGen/PTX/fdiv-sm13.ll +++ b/test/CodeGen/PTX/fdiv-sm13.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.rn.f32 r0, r1, r2; +; CHECK: div.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fdiv float %x, %y ret float %a } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.rn.f64 rd0, rd1, rd2; +; CHECK: div.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fdiv double %x, %y ret double %a diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll index 185c37c..66ca74a 100644 --- a/test/CodeGen/PTX/fneg.ll +++ b/test/CodeGen/PTX/fneg.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device float @t1_f32(float %x) { -; CHECK: neg.f32 r0, r1; +; CHECK: neg.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %y = fsub float -0.000000e+00, %x ret float %y } define ptx_device double @t1_f64(double %x) { -; CHECK: neg.f64 rd0, rd1; +; CHECK: neg.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %y = fsub double -0.000000e+00, %x ret double %y diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll index cea4182..af987d6 100644 --- a/test/CodeGen/PTX/intrinsic.ll +++ b/test/CodeGen/PTX/intrinsic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s +; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s define ptx_device i32 @test_tid_x() { ; CHECK: mov.u32 r0, %tid.x; diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll index 9b75998..d184d12 100644 --- a/test/CodeGen/PTX/ld.ll +++ b/test/CodeGen/PTX/ld.ll @@ -63,7 +63,7 @@ define ptx_device i16 @t1_u16(i16* %p) { entry: -;CHECK: ld.global.u16 rh0, [r1]; +;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load i16* %p ret i16 %x @@ -71,7 +71,7 @@ entry: define ptx_device i32 @t1_u32(i32* %p) { entry: -;CHECK: ld.global.u32 r0, [r1]; +;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load i32* %p ret i32 %x @@ -79,7 +79,7 @@ entry: define ptx_device i64 @t1_u64(i64* %p) { entry: -;CHECK: ld.global.u64 rd0, [r1]; +;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load i64* %p ret i64 %x @@ -87,7 +87,7 @@ entry: define ptx_device float @t1_f32(float* %p) { entry: -;CHECK: ld.global.f32 r0, [r1]; +;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load float* %p ret float %x @@ -95,7 +95,7 @@ entry: define ptx_device double @t1_f64(double* %p) { entry: -;CHECK: ld.global.f64 rd0, [r1]; +;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}]; ;CHECK-NEXT: ret; %x = load double* %p ret double %x @@ -103,7 +103,7 @@ entry: define ptx_device i16 @t2_u16(i16* %p) { entry: -;CHECK: ld.global.u16 rh0, [r1+2]; +;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2]; ;CHECK-NEXT: ret; %i = getelementptr i16* %p, i32 1 %x = load i16* %i @@ -112,7 +112,7 @@ entry: define ptx_device i32 @t2_u32(i32* %p) { entry: -;CHECK: ld.global.u32 r0, [r1+4]; +;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}+4]; ;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 1 %x = load i32* %i @@ -121,7 +121,7 @@ entry: define ptx_device i64 @t2_u64(i64* %p) { entry: -;CHECK: ld.global.u64 rd0, [r1+8]; +;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}+8]; ;CHECK-NEXT: ret; %i = getelementptr i64* %p, i32 1 %x = load i64* %i @@ -130,7 +130,7 @@ entry: define ptx_device float @t2_f32(float* %p) { entry: -;CHECK: ld.global.f32 r0, [r1+4]; +;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}+4]; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 1 %x = load float* %i @@ -139,7 +139,7 @@ entry: define ptx_device double @t2_f64(double* %p) { entry: -;CHECK: ld.global.f64 rd0, [r1+8]; +;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}+8]; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 1 %x = load double* %i @@ -148,9 +148,9 @@ entry: define ptx_device i16 @t3_u16(i16* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 1; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.u16 rh0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]]; %i = getelementptr i16* %p, i32 %q %x = load i16* %i ret i16 %x @@ -158,9 +158,9 @@ entry: define ptx_device i32 @t3_u32(i32* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 2; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.u32 r0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]]; %i = getelementptr i32* %p, i32 %q %x = load i32* %i ret i32 %x @@ -168,9 +168,9 @@ entry: define ptx_device i64 @t3_u64(i64* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 3; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.u64 rd0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]]; %i = getelementptr i64* %p, i32 %q %x = load i64* %i ret i64 %x @@ -178,9 +178,9 @@ entry: define ptx_device float @t3_f32(float* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 2; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.f32 r0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]]; %i = getelementptr float* %p, i32 %q %x = load float* %i ret float %x @@ -188,9 +188,9 @@ entry: define ptx_device double @t3_f64(double* %p, i32 %q) { entry: -;CHECK: shl.b32 r0, r2, 3; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.f64 rd0, [r0]; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]]; %i = getelementptr double* %p, i32 %q %x = load double* %i ret double %x @@ -198,8 +198,8 @@ entry: define ptx_device i16 @t4_global_u16() { entry: -;CHECK: mov.u32 r0, array_i16; -;CHECK-NEXT: ld.global.u16 rh0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; +;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0 %x = load i16* %i @@ -208,8 +208,8 @@ entry: define ptx_device i32 @t4_global_u32() { entry: -;CHECK: mov.u32 r0, array_i32; -;CHECK-NEXT: ld.global.u32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; +;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 %x = load i32* %i @@ -218,8 +218,8 @@ entry: define ptx_device i64 @t4_global_u64() { entry: -;CHECK: mov.u32 r0, array_i64; -;CHECK-NEXT: ld.global.u64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; +;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 %x = load i64* %i @@ -228,8 +228,8 @@ entry: define ptx_device float @t4_global_f32() { entry: -;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: ld.global.f32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; +;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 %x = load float* %i @@ -238,8 +238,8 @@ entry: define ptx_device double @t4_global_f64() { entry: -;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: ld.global.f64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; +;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 %x = load double* %i @@ -248,8 +248,8 @@ entry: define ptx_device i16 @t4_const_u16() { entry: -;CHECK: mov.u32 r0, array_constant_i16; -;CHECK-NEXT: ld.const.u16 rh0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i16; +;CHECK-NEXT: ld.const.u16 rh{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0 %x = load i16 addrspace(1)* %i @@ -258,8 +258,8 @@ entry: define ptx_device i32 @t4_const_u32() { entry: -;CHECK: mov.u32 r0, array_constant_i32; -;CHECK-NEXT: ld.const.u32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i32; +;CHECK-NEXT: ld.const.u32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0 %x = load i32 addrspace(1)* %i @@ -268,8 +268,8 @@ entry: define ptx_device i64 @t4_const_u64() { entry: -;CHECK: mov.u32 r0, array_constant_i64; -;CHECK-NEXT: ld.const.u64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i64; +;CHECK-NEXT: ld.const.u64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0 %x = load i64 addrspace(1)* %i @@ -278,8 +278,8 @@ entry: define ptx_device float @t4_const_f32() { entry: -;CHECK: mov.u32 r0, array_constant_float; -;CHECK-NEXT: ld.const.f32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_float; +;CHECK-NEXT: ld.const.f32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 %x = load float addrspace(1)* %i @@ -288,8 +288,8 @@ entry: define ptx_device double @t4_const_f64() { entry: -;CHECK: mov.u32 r0, array_constant_double; -;CHECK-NEXT: ld.const.f64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_double; +;CHECK-NEXT: ld.const.f64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 %x = load double addrspace(1)* %i @@ -298,8 +298,8 @@ entry: define ptx_device i16 @t4_local_u16() { entry: -;CHECK: mov.u32 r0, array_local_i16; -;CHECK-NEXT: ld.local.u16 rh0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; +;CHECK-NEXT: ld.local.u16 rh{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 %x = load i16 addrspace(2)* %i @@ -308,8 +308,8 @@ entry: define ptx_device i32 @t4_local_u32() { entry: -;CHECK: mov.u32 r0, array_local_i32; -;CHECK-NEXT: ld.local.u32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32; +;CHECK-NEXT: ld.local.u32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 %x = load i32 addrspace(2)* %i @@ -318,8 +318,8 @@ entry: define ptx_device i64 @t4_local_u64() { entry: -;CHECK: mov.u32 r0, array_local_i64; -;CHECK-NEXT: ld.local.u64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64; +;CHECK-NEXT: ld.local.u64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 %x = load i64 addrspace(2)* %i @@ -328,8 +328,8 @@ entry: define ptx_device float @t4_local_f32() { entry: -;CHECK: mov.u32 r0, array_local_float; -;CHECK-NEXT: ld.local.f32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float; +;CHECK-NEXT: ld.local.f32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 %x = load float addrspace(2)* %i @@ -338,8 +338,8 @@ entry: define ptx_device double @t4_local_f64() { entry: -;CHECK: mov.u32 r0, array_local_double; -;CHECK-NEXT: ld.local.f64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double; +;CHECK-NEXT: ld.local.f64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 %x = load double addrspace(2)* %i @@ -348,8 +348,8 @@ entry: define ptx_device i16 @t4_shared_u16() { entry: -;CHECK: mov.u32 r0, array_shared_i16; -;CHECK-NEXT: ld.shared.u16 rh0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; +;CHECK-NEXT: ld.shared.u16 rh{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 %x = load i16 addrspace(4)* %i @@ -358,8 +358,8 @@ entry: define ptx_device i32 @t4_shared_u32() { entry: -;CHECK: mov.u32 r0, array_shared_i32; -;CHECK-NEXT: ld.shared.u32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32; +;CHECK-NEXT: ld.shared.u32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 %x = load i32 addrspace(4)* %i @@ -368,8 +368,8 @@ entry: define ptx_device i64 @t4_shared_u64() { entry: -;CHECK: mov.u32 r0, array_shared_i64; -;CHECK-NEXT: ld.shared.u64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64; +;CHECK-NEXT: ld.shared.u64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 %x = load i64 addrspace(4)* %i @@ -378,8 +378,8 @@ entry: define ptx_device float @t4_shared_f32() { entry: -;CHECK: mov.u32 r0, array_shared_float; -;CHECK-NEXT: ld.shared.f32 r0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float; +;CHECK-NEXT: ld.shared.f32 r{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 %x = load float addrspace(4)* %i @@ -388,8 +388,8 @@ entry: define ptx_device double @t4_shared_f64() { entry: -;CHECK: mov.u32 r0, array_shared_double; -;CHECK-NEXT: ld.shared.f64 rd0, [r0]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double; +;CHECK-NEXT: ld.shared.f64 rd{{[0-9]+}}, [r[[R0]]]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 %x = load double addrspace(4)* %i @@ -398,8 +398,8 @@ entry: define ptx_device i16 @t5_u16() { entry: -;CHECK: mov.u32 r0, array_i16; -;CHECK-NEXT: ld.global.u16 rh0, [r0+2]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; +;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]+2]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 %x = load i16* %i @@ -408,8 +408,8 @@ entry: define ptx_device i32 @t5_u32() { entry: -;CHECK: mov.u32 r0, array_i32; -;CHECK-NEXT: ld.global.u32 r0, [r0+4]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; +;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]+4]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 %x = load i32* %i @@ -418,8 +418,8 @@ entry: define ptx_device i64 @t5_u64() { entry: -;CHECK: mov.u32 r0, array_i64; -;CHECK-NEXT: ld.global.u64 rd0, [r0+8]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; +;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]+8]; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 %x = load i64* %i @@ -428,8 +428,8 @@ entry: define ptx_device float @t5_f32() { entry: -;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: ld.global.f32 r0, [r0+4]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; +;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]+4]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 %x = load float* %i @@ -438,8 +438,8 @@ entry: define ptx_device double @t5_f64() { entry: -;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: ld.global.f64 rd0, [r0+8]; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; +;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]+8]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 %x = load double* %i diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll index a317645..4611c54 100644 --- a/test/CodeGen/PTX/llvm-intrinsic.ll +++ b/test/CodeGen/PTX/llvm-intrinsic.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s +; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s define ptx_device float @test_sqrt_f32(float %x) { entry: -; CHECK: sqrt.rn.f32 r0, r1; +; CHECK: sqrt.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call float @llvm.sqrt.f32(float %x) ret float %y @@ -10,7 +10,7 @@ entry: define ptx_device double @test_sqrt_f64(double %x) { entry: -; CHECK: sqrt.rn.f64 rd0, rd1; +; CHECK: sqrt.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call double @llvm.sqrt.f64(double %x) ret double %y @@ -18,7 +18,7 @@ entry: define ptx_device float @test_sin_f32(float %x) { entry: -; CHECK: sin.approx.f32 r0, r1; +; CHECK: sin.approx.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call float @llvm.sin.f32(float %x) ret float %y @@ -26,7 +26,7 @@ entry: define ptx_device double @test_sin_f64(double %x) { entry: -; CHECK: sin.approx.f64 rd0, rd1; +; CHECK: sin.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call double @llvm.sin.f64(double %x) ret double %y @@ -34,7 +34,7 @@ entry: define ptx_device float @test_cos_f32(float %x) { entry: -; CHECK: cos.approx.f32 r0, r1; +; CHECK: cos.approx.f32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call float @llvm.cos.f32(float %x) ret float %y @@ -42,7 +42,7 @@ entry: define ptx_device double @test_cos_f64(double %x) { entry: -; CHECK: cos.approx.f64 rd0, rd1; +; CHECK: cos.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %y = call double @llvm.cos.f64(double %x) ret double %y diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll index 56d3811..0e4d3f9 100644 --- a/test/CodeGen/PTX/mad.ll +++ b/test/CodeGen/PTX/mad.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y, float %z) { -; CHECK: mad.rn.f32 r0, r1, r2, r3; +; CHECK: mad.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fmul float %x, %y %b = fadd float %a, %z @@ -9,7 +9,7 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) { } define ptx_device double @t1_f64(double %x, double %y, double %z) { -; CHECK: mad.rn.f64 rd0, rd1, rd2, rd3; +; CHECK: mad.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %a = fmul double %x, %y %b = fadd double %a, %z diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll index 05ce4c0..cce6a5b 100644 --- a/test/CodeGen/PTX/mov.ll +++ b/test/CodeGen/PTX/mov.ll @@ -1,61 +1,61 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16() { -; CHECK: mov.u16 rh0, 0; +; CHECK: mov.u16 rh{{[0-9]+}}, 0; ; CHECK: ret; ret i16 0 } define ptx_device i32 @t1_u32() { -; CHECK: mov.u32 r0, 0; +; CHECK: mov.u32 r{{[0-9]+}}, 0; ; CHECK: ret; ret i32 0 } define ptx_device i64 @t1_u64() { -; CHECK: mov.u64 rd0, 0; +; CHECK: mov.u64 rd{{[0-9]+}}, 0; ; CHECK: ret; ret i64 0 } define ptx_device float @t1_f32() { -; CHECK: mov.f32 r0, 0F00000000; +; CHECK: mov.f32 r{{[0-9]+}}, 0F00000000; ; CHECK: ret; ret float 0.0 } define ptx_device double @t1_f64() { -; CHECK: mov.f64 rd0, 0D0000000000000000; +; CHECK: mov.f64 rd{{[0-9]+}}, 0D0000000000000000; ; CHECK: ret; ret double 0.0 } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: mov.u16 rh0, rh1; +; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK: ret; ret i16 %x } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: mov.u32 r0, r1; +; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK: ret; ret i32 %x } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: mov.u64 rd0, rd1; +; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK: ret; ret i64 %x } define ptx_device float @t3_f32(float %x) { -; CHECK: mov.u32 r0, r1; +; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; ret float %x } define ptx_device double @t3_f64(double %x) { -; CHECK: mov.u64 rd0, rd1; +; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; ret double %x } diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll index 2093556..491cc74 100644 --- a/test/CodeGen/PTX/mul.ll +++ b/test/CodeGen/PTX/mul.ll @@ -11,28 +11,28 @@ ;} define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: mul.rn.f32 r0, r1, r2 +; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NEXT: ret; %z = fmul float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: mul.rn.f64 rd0, rd1, rd2 +; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} ; CHECK-NEXT: ret; %z = fmul double %x, %y ret double %z } define ptx_device float @t2_f32(float %x) { -; CHECK: mul.rn.f32 r0, r1, 0F40A00000; +; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F40A00000; ; CHECK-NEXT: ret; %z = fmul float %x, 5.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: mul.rn.f64 rd0, rd1, 0D4014000000000000; +; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D4014000000000000; ; CHECK-NEXT: ret; %z = fmul double %x, 5.0 ret double %z diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll index 5486472..b16556e 100644 --- a/test/CodeGen/PTX/parameter-order.ll +++ b/test/CodeGen/PTX/parameter-order.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s -; CHECK: .func (.reg .b32 r0) test_parameter_order (.reg .b32 r1, .reg .b32 r2, .reg .b32 r3, .reg .b32 r4) +; CHECK: .func (.reg .b32 r{{[0-9]+}}) test_parameter_order (.reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}) define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) { -; CHECK: sub.u32 r0, r2, r3 +; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} %result = sub i32 %b, %c ret i32 %result } diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll index 19cfa53..e705fbe 100644 --- a/test/CodeGen/PTX/selp.ll +++ b/test/CodeGen/PTX/selp.ll @@ -1,25 +1,25 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) { -; CHECK: selp.u32 r0, r1, r2, p1; +; CHECK: selp.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}}; %a = select i1 %x, i32 %y, i32 %z ret i32 %a } define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) { -; CHECK: selp.u64 rd0, rd1, rd2, p1; +; CHECK: selp.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}}; %a = select i1 %x, i64 %y, i64 %z ret i64 %a } define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) { -; CHECK: selp.f32 r0, r1, r2, p1; +; CHECK: selp.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}}; %a = select i1 %x, float %y, float %z ret float %a } define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) { -; CHECK: selp.f64 rd0, rd1, rd2, p1; +; CHECK: selp.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}}; %a = select i1 %x, double %y, double %z ret double %a } diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll index 3e01a75..e0044d6 100644 --- a/test/CodeGen/PTX/setp.ll +++ b/test/CodeGen/PTX/setp.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.eq.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp eq i32 %x, %y %z = zext i1 %p to i32 @@ -10,8 +10,8 @@ define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.ne.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ne i32 %x, %y %z = zext i1 %p to i32 @@ -19,8 +19,8 @@ define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.lt.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ult i32 %x, %y %z = zext i1 %p to i32 @@ -28,8 +28,8 @@ define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.le.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.le.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ule i32 %x, %y %z = zext i1 %p to i32 @@ -37,8 +37,8 @@ define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.gt.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ugt i32 %x, %y %z = zext i1 %p to i32 @@ -46,8 +46,8 @@ define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.ge.u32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ge.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp uge i32 %x, %y %z = zext i1 %p to i32 @@ -55,8 +55,8 @@ define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.lt.s32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp slt i32 %x, %y %z = zext i1 %p to i32 @@ -64,8 +64,8 @@ define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.le.s32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.le.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sle i32 %x, %y %z = zext i1 %p to i32 @@ -73,8 +73,8 @@ define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.gt.s32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sgt i32 %x, %y %z = zext i1 %p to i32 @@ -82,8 +82,8 @@ define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.ge.s32 p0, r1, r2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ge.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sge i32 %x, %y %z = zext i1 %p to i32 @@ -91,8 +91,8 @@ define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) { } define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { -; CHECK: setp.eq.u32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp eq i32 %x, 1 %z = zext i1 %p to i32 @@ -100,8 +100,8 @@ define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { -; CHECK: setp.ne.u32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ne i32 %x, 1 %z = zext i1 %p to i32 @@ -109,8 +109,8 @@ define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { -; CHECK: setp.eq.u32 p0, r1, 0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ult i32 %x, 1 %z = zext i1 %p to i32 @@ -118,8 +118,8 @@ define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { -; CHECK: setp.lt.u32 p0, r1, 2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ule i32 %x, 1 %z = zext i1 %p to i32 @@ -127,8 +127,8 @@ define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { -; CHECK: setp.gt.u32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp ugt i32 %x, 1 %z = zext i1 %p to i32 @@ -136,8 +136,8 @@ define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { -; CHECK: setp.ne.u32 p0, r1, 0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp uge i32 %x, 1 %z = zext i1 %p to i32 @@ -145,8 +145,8 @@ define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { } define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) { -; CHECK: setp.lt.s32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp slt i32 %x, 1 %z = zext i1 %p to i32 @@ -154,8 +154,8 @@ define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) { } define ptx_device i32 @test_setp_le_s32_ri(i32 %x) { -; CHECK: setp.lt.s32 p0, r1, 2; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sle i32 %x, 1 %z = zext i1 %p to i32 @@ -163,8 +163,8 @@ define ptx_device i32 @test_setp_le_s32_ri(i32 %x) { } define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) { -; CHECK: setp.gt.s32 p0, r1, 1; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sgt i32 %x, 1 %z = zext i1 %p to i32 @@ -172,8 +172,8 @@ define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) { } define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) { -; CHECK: setp.gt.s32 p0, r1, 0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %p = icmp sge i32 %x, 1 %z = zext i1 %p to i32 @@ -181,9 +181,9 @@ define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) { } define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { -; CHECK: setp.gt.u32 p0, r3, r4; -; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, p[[P0]]; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %c = icmp eq i32 %x, %y %d = icmp ugt i32 %u, %v @@ -193,9 +193,9 @@ define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { } define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) { -; CHECK: setp.gt.b32 p0, r3, 0; -; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0; -; CHECK-NEXT: selp.u32 r0, 1, 0, p0; +; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0; +; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, !p[[P0]]; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]]; ; CHECK-NEXT: ret; %c = trunc i32 %w to i1 %d = icmp eq i32 %x, %y diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll index 6e72c92..b3818e1 100644 --- a/test/CodeGen/PTX/shl.ll +++ b/test/CodeGen/PTX/shl.ll @@ -1,21 +1,21 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: shl.b32 r0, r1, r2 +; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} %z = shl i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t2(i32 %x) { -; CHECK: shl.b32 r0, r1, 3 +; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, 3 %z = shl i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t3(i32 %x) { -; CHECK: shl.b32 r0, 3, r1 +; CHECK: shl.b32 r{{[0-9]+}}, 3, r{{[0-9]+}} %z = shl i32 3, %x ; CHECK: ret; ret i32 %z diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll index 8693e0e..cb57546 100644 --- a/test/CodeGen/PTX/shr.ll +++ b/test/CodeGen/PTX/shr.ll @@ -1,42 +1,42 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: shr.u32 r0, r1, r2 +; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} %z = lshr i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t2(i32 %x) { -; CHECK: shr.u32 r0, r1, 3 +; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, 3 %z = lshr i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t3(i32 %x) { -; CHECK: shr.u32 r0, 3, r1 +; CHECK: shr.u32 r{{[0-9]+}}, 3, r{{[0-9]+}} %z = lshr i32 3, %x ; CHECK: ret; ret i32 %z } define ptx_device i32 @t4(i32 %x, i32 %y) { -; CHECK: shr.s32 r0, r1, r2 +; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} %z = ashr i32 %x, %y ; CHECK: ret; ret i32 %z } define ptx_device i32 @t5(i32 %x) { -; CHECK: shr.s32 r0, r1, 3 +; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, 3 %z = ashr i32 %x, 3 ; CHECK: ret; ret i32 %z } define ptx_device i32 @t6(i32 %x) { -; CHECK: shr.s32 r0, -3, r1 +; CHECK: shr.s32 r{{[0-9]+}}, -3, r{{[0-9]+}} %z = ashr i32 -3, %x ; CHECK: ret; ret i32 %z diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll index 612967a..b08528e 100644 --- a/test/CodeGen/PTX/st.ll +++ b/test/CodeGen/PTX/st.ll @@ -63,7 +63,7 @@ define ptx_device void @t1_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.u16 [r1], rh1; +;CHECK: st.global.u16 [r{{[0-9]+}}], rh{{[0-9]+}}; ;CHECK-NEXT: ret; store i16 %x, i16* %p ret void @@ -71,7 +71,7 @@ entry: define ptx_device void @t1_u32(i32* %p, i32 %x) { entry: -;CHECK: st.global.u32 [r1], r2; +;CHECK: st.global.u32 [r{{[0-9]+}}], r{{[0-9]+}}; ;CHECK-NEXT: ret; store i32 %x, i32* %p ret void @@ -79,7 +79,7 @@ entry: define ptx_device void @t1_u64(i64* %p, i64 %x) { entry: -;CHECK: st.global.u64 [r1], rd1; +;CHECK: st.global.u64 [r{{[0-9]+}}], rd{{[0-9]+}}; ;CHECK-NEXT: ret; store i64 %x, i64* %p ret void @@ -87,7 +87,7 @@ entry: define ptx_device void @t1_f32(float* %p, float %x) { entry: -;CHECK: st.global.f32 [r1], r2; +;CHECK: st.global.f32 [r{{[0-9]+}}], r{{[0-9]+}}; ;CHECK-NEXT: ret; store float %x, float* %p ret void @@ -95,7 +95,7 @@ entry: define ptx_device void @t1_f64(double* %p, double %x) { entry: -;CHECK: st.global.f64 [r1], rd1; +;CHECK: st.global.f64 [r{{[0-9]+}}], rd{{[0-9]+}}; ;CHECK-NEXT: ret; store double %x, double* %p ret void @@ -103,7 +103,7 @@ entry: define ptx_device void @t2_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.u16 [r1+2], rh1; +;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i16* %p, i32 1 store i16 %x, i16* %i @@ -112,7 +112,7 @@ entry: define ptx_device void @t2_u32(i32* %p, i32 %x) { entry: -;CHECK: st.global.u32 [r1+4], r2; +;CHECK: st.global.u32 [r{{[0-9]+}}+4], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 1 store i32 %x, i32* %i @@ -121,7 +121,7 @@ entry: define ptx_device void @t2_u64(i64* %p, i64 %x) { entry: -;CHECK: st.global.u64 [r1+8], rd1; +;CHECK: st.global.u64 [r{{[0-9]+}}+8], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i64* %p, i32 1 store i64 %x, i64* %i @@ -130,7 +130,7 @@ entry: define ptx_device void @t2_f32(float* %p, float %x) { entry: -;CHECK: st.global.f32 [r1+4], r2; +;CHECK: st.global.f32 [r{{[0-9]+}}+4], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 1 store float %x, float* %i @@ -139,7 +139,7 @@ entry: define ptx_device void @t2_f64(double* %p, double %x) { entry: -;CHECK: st.global.f64 [r1+8], rd1; +;CHECK: st.global.f64 [r{{[0-9]+}}+8], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 1 store double %x, double* %i @@ -148,9 +148,9 @@ entry: define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) { entry: -;CHECK: shl.b32 r0, r2, 1; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.u16 [r0], rh1; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i16* %p, i32 %q store i16 %x, i16* %i @@ -159,9 +159,9 @@ entry: define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) { entry: -;CHECK: shl.b32 r0, r2, 2; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.u32 [r0], r3; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 %q store i32 %x, i32* %i @@ -170,9 +170,9 @@ entry: define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) { entry: -;CHECK: shl.b32 r0, r2, 3; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.u64 [r0], rd1; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr i64* %p, i32 %q store i64 %x, i64* %i @@ -181,9 +181,9 @@ entry: define ptx_device void @t3_f32(float* %p, i32 %q, float %x) { entry: -;CHECK: shl.b32 r0, r2, 2; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.f32 [r0], r3; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 %q store float %x, float* %i @@ -192,9 +192,9 @@ entry: define ptx_device void @t3_f64(double* %p, i32 %q, double %x) { entry: -;CHECK: shl.b32 r0, r2, 3; -;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.f64 [r0], rd1; +;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3; +;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]]; +;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 %q store double %x, double* %i @@ -203,8 +203,8 @@ entry: define ptx_device void @t4_global_u16(i16 %x) { entry: -;CHECK: mov.u32 r0, array_i16; -;CHECK-NEXT: st.global.u16 [r0], rh1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; +;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0 store i16 %x, i16* %i @@ -213,8 +213,8 @@ entry: define ptx_device void @t4_global_u32(i32 %x) { entry: -;CHECK: mov.u32 r0, array_i32; -;CHECK-NEXT: st.global.u32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; +;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 store i32 %x, i32* %i @@ -223,8 +223,8 @@ entry: define ptx_device void @t4_global_u64(i64 %x) { entry: -;CHECK: mov.u32 r0, array_i64; -;CHECK-NEXT: st.global.u64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; +;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 store i64 %x, i64* %i @@ -233,8 +233,8 @@ entry: define ptx_device void @t4_global_f32(float %x) { entry: -;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: st.global.f32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; +;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 store float %x, float* %i @@ -243,8 +243,8 @@ entry: define ptx_device void @t4_global_f64(double %x) { entry: -;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: st.global.f64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; +;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 store double %x, double* %i @@ -253,8 +253,8 @@ entry: define ptx_device void @t4_local_u16(i16 %x) { entry: -;CHECK: mov.u32 r0, array_local_i16; -;CHECK-NEXT: st.local.u16 [r0], rh1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; +;CHECK-NEXT: st.local.u16 [r[[R0]]], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 store i16 %x, i16 addrspace(2)* %i @@ -263,8 +263,8 @@ entry: define ptx_device void @t4_local_u32(i32 %x) { entry: -;CHECK: mov.u32 r0, array_local_i32; -;CHECK-NEXT: st.local.u32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32; +;CHECK-NEXT: st.local.u32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 store i32 %x, i32 addrspace(2)* %i @@ -273,8 +273,8 @@ entry: define ptx_device void @t4_local_u64(i64 %x) { entry: -;CHECK: mov.u32 r0, array_local_i64; -;CHECK-NEXT: st.local.u64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64; +;CHECK-NEXT: st.local.u64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 store i64 %x, i64 addrspace(2)* %i @@ -283,8 +283,8 @@ entry: define ptx_device void @t4_local_f32(float %x) { entry: -;CHECK: mov.u32 r0, array_local_float; -;CHECK-NEXT: st.local.f32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float; +;CHECK-NEXT: st.local.f32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 store float %x, float addrspace(2)* %i @@ -293,8 +293,8 @@ entry: define ptx_device void @t4_local_f64(double %x) { entry: -;CHECK: mov.u32 r0, array_local_double; -;CHECK-NEXT: st.local.f64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double; +;CHECK-NEXT: st.local.f64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 store double %x, double addrspace(2)* %i @@ -303,8 +303,8 @@ entry: define ptx_device void @t4_shared_u16(i16 %x) { entry: -;CHECK: mov.u32 r0, array_shared_i16; -;CHECK-NEXT: st.shared.u16 [r0], rh1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; +;CHECK-NEXT: st.shared.u16 [r[[R0]]], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 store i16 %x, i16 addrspace(4)* %i @@ -313,8 +313,8 @@ entry: define ptx_device void @t4_shared_u32(i32 %x) { entry: -;CHECK: mov.u32 r0, array_shared_i32; -;CHECK-NEXT: st.shared.u32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32; +;CHECK-NEXT: st.shared.u32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 store i32 %x, i32 addrspace(4)* %i @@ -323,8 +323,8 @@ entry: define ptx_device void @t4_shared_u64(i64 %x) { entry: -;CHECK: mov.u32 r0, array_shared_i64; -;CHECK-NEXT: st.shared.u64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64; +;CHECK-NEXT: st.shared.u64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 store i64 %x, i64 addrspace(4)* %i @@ -333,8 +333,8 @@ entry: define ptx_device void @t4_shared_f32(float %x) { entry: -;CHECK: mov.u32 r0, array_shared_float; -;CHECK-NEXT: st.shared.f32 [r0], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float; +;CHECK-NEXT: st.shared.f32 [r[[R0]]], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 store float %x, float addrspace(4)* %i @@ -343,8 +343,8 @@ entry: define ptx_device void @t4_shared_f64(double %x) { entry: -;CHECK: mov.u32 r0, array_shared_double; -;CHECK-NEXT: st.shared.f64 [r0], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double; +;CHECK-NEXT: st.shared.f64 [r[[R0]]], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 store double %x, double addrspace(4)* %i @@ -353,8 +353,8 @@ entry: define ptx_device void @t5_u16(i16 %x) { entry: -;CHECK: mov.u32 r0, array_i16; -;CHECK-NEXT: st.global.u16 [r0+2], rh1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; +;CHECK-NEXT: st.global.u16 [r[[R0]]+2], rh{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 store i16 %x, i16* %i @@ -363,8 +363,8 @@ entry: define ptx_device void @t5_u32(i32 %x) { entry: -;CHECK: mov.u32 r0, array_i32; -;CHECK-NEXT: st.global.u32 [r0+4], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32; +;CHECK-NEXT: st.global.u32 [r[[R0]]+4], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 store i32 %x, i32* %i @@ -373,8 +373,8 @@ entry: define ptx_device void @t5_u64(i64 %x) { entry: -;CHECK: mov.u32 r0, array_i64; -;CHECK-NEXT: st.global.u64 [r0+8], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64; +;CHECK-NEXT: st.global.u64 [r[[R0]]+8], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 store i64 %x, i64* %i @@ -383,8 +383,8 @@ entry: define ptx_device void @t5_f32(float %x) { entry: -;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: st.global.f32 [r0+4], r1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_float; +;CHECK-NEXT: st.global.f32 [r[[R0]]+4], r{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 store float %x, float* %i @@ -393,8 +393,8 @@ entry: define ptx_device void @t5_f64(double %x) { entry: -;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: st.global.f64 [r0+8], rd1; +;CHECK: mov.u32 r[[R0:[0-9]+]], array_double; +;CHECK-NEXT: st.global.f64 [r[[R0]]+8], rd{{[0-9]+}}; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 store double %x, double* %i diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll index 4d55280..acef396 100644 --- a/test/CodeGen/PTX/sub.ll +++ b/test/CodeGen/PTX/sub.ll @@ -1,70 +1,70 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i16 @t1_u16(i16 %x, i16 %y) { -; CHECK: sub.u16 rh0, rh1, rh2; +; CHECK: sub.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; %z = sub i16 %x, %y ret i16 %z } define ptx_device i32 @t1_u32(i32 %x, i32 %y) { -; CHECK: sub.u32 r0, r1, r2; +; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; %z = sub i32 %x, %y ret i32 %z } define ptx_device i64 @t1_u64(i64 %x, i64 %y) { -; CHECK: sub.u64 rd0, rd1, rd2; +; CHECK: sub.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}; ; CHECK-NEXT: ret; %z = sub i64 %x, %y ret i64 %z } define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: sub.rn.f32 r0, r1, r2 +; CHECK: sub.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NEXT: ret; %z = fsub float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: sub.rn.f64 rd0, rd1, rd2 +; CHECK: sub.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}} ; CHECK-NEXT: ret; %z = fsub double %x, %y ret double %z } define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: add.u16 rh0, rh1, -1; +; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, -1; ; CHECK-NEXT: ret; %z = sub i16 %x, 1 ret i16 %z } define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: add.u32 r0, r1, -1; +; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, -1; ; CHECK-NEXT: ret; %z = sub i32 %x, 1 ret i32 %z } define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: add.u64 rd0, rd1, -1; +; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, -1; ; CHECK-NEXT: ret; %z = sub i64 %x, 1 ret i64 %z } define ptx_device float @t2_f32(float %x) { -; CHECK: add.rn.f32 r0, r1, 0FBF800000; +; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0FBF800000; ; CHECK-NEXT: ret; %z = fsub float %x, 1.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: add.rn.f64 rd0, rd1, 0DBFF0000000000000; +; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0DBFF0000000000000; ; CHECK-NEXT: ret; %z = fsub double %x, 1.0 ret double %z diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll new file mode 100644 index 0000000..6042991 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc32-vaarg.ll @@ -0,0 +1,167 @@ +; RUN: llc -O0 < %s | FileCheck %s +;ModuleID = 'test.c' +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-unknown-freebsd9.0" + +%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* } + +@var1 = common global i64 0, align 8 +@var2 = common global double 0.0, align 8 +@var3 = common global i32 0, align 4 + +define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind { + entry: + %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6 +; CHECK: lbz 4, 0(3) +; CHECK-NEXT: lwz 5, 4(3) +; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31 +; CHECK-NEXT: cmplwi 0, 6, 0 +; CHECK-NEXT: addi 6, 4, 1 +; CHECK-NEXT: stw 3, -4(1) +; CHECK-NEXT: stw 6, -8(1) +; CHECK-NEXT: stw 4, -12(1) +; CHECK-NEXT: stw 5, -16(1) +; CHECK-NEXT: bne 0, .LBB0_2 +; CHECK-NEXT: # BB#1: # %entry +; CHECK-NEXT: lwz 3, -12(1) +; CHECK-NEXT: stw 3, -8(1) +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: lwz 3, -8(1) +; CHECK-NEXT: lwz 4, -4(1) +; CHECK-NEXT: lwz 5, 8(4) +; CHECK-NEXT: slwi 6, 3, 2 +; CHECK-NEXT: addi 7, 3, 2 +; CHECK-NEXT: cmpwi 0, 3, 8 +; CHECK-NEXT: lwz 3, -16(1) +; CHECK-NEXT: addi 8, 3, 4 +; CHECK-NEXT: add 5, 5, 6 +; CHECK-NEXT: mfcr 0 # cr0 +; CHECK-NEXT: stw 0, -20(1) +; CHECK-NEXT: stw 5, -24(1) +; CHECK-NEXT: stw 3, -28(1) +; CHECK-NEXT: stw 7, -32(1) +; CHECK-NEXT: stw 8, -36(1) +; CHECK-NEXT: blt 0, .LBB0_4 +; CHECK-NEXT: # BB#3: # %entry +; CHECK-NEXT: lwz 3, -36(1) +; CHECK-NEXT: stw 3, -28(1) +; CHECK-NEXT: .LBB0_4: # %entry +; CHECK-NEXT: lwz 3, -28(1) +; CHECK-NEXT: lwz 4, -32(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stb 4, 0(5) +; CHECK-NEXT: lwz 4, -24(1) +; CHECK-NEXT: lwz 0, -20(1) +; CHECK-NEXT: mtcrf 128, 0 +; CHECK-NEXT: stw 3, -40(1) +; CHECK-NEXT: stw 4, -44(1) +; CHECK-NEXT: blt 0, .LBB0_6 +; CHECK-NEXT: # BB#5: # %entry +; CHECK-NEXT: lwz 3, -16(1) +; CHECK-NEXT: stw 3, -44(1) +; CHECK-NEXT: .LBB0_6: # %entry +; CHECK-NEXT: lwz 3, -44(1) +; CHECK-NEXT: lwz 4, -40(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stw 4, 4(5) + store i64 %x, i64* @var1, align 8 +; CHECK-NEXT: lis 4, var1@ha +; CHECK-NEXT: lwz 6, 4(3) +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: la 7, var1@l(4) +; CHECK-NEXT: stw 3, var1@l(4) +; CHECK-NEXT: stw 6, 4(7) + %y = va_arg %struct.__va_list_tag* %ap, double; From f1 +; CHECK-NEXT: lbz 3, 1(5) +; CHECK-NEXT: lwz 4, 4(5) +; CHECK-NEXT: lwz 6, 8(5) +; CHECK-NEXT: slwi 7, 3, 3 +; CHECK-NEXT: add 6, 6, 7 +; CHECK-NEXT: addi 7, 3, 1 +; CHECK-NEXT: cmpwi 0, 3, 8 +; CHECK-NEXT: addi 3, 4, 8 +; CHECK-NEXT: addi 6, 6, 32 +; CHECK-NEXT: mr 8, 4 +; CHECK-NEXT: mfcr 0 # cr0 +; CHECK-NEXT: stw 0, -48(1) +; CHECK-NEXT: stw 4, -52(1) +; CHECK-NEXT: stw 6, -56(1) +; CHECK-NEXT: stw 7, -60(1) +; CHECK-NEXT: stw 3, -64(1) +; CHECK-NEXT: stw 8, -68(1) +; CHECK-NEXT: blt 0, .LBB0_8 +; CHECK-NEXT: # BB#7: # %entry +; CHECK-NEXT: lwz 3, -64(1) +; CHECK-NEXT: stw 3, -68(1) +; CHECK-NEXT: .LBB0_8: # %entry +; CHECK-NEXT: lwz 3, -68(1) +; CHECK-NEXT: lwz 4, -60(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stb 4, 1(5) +; CHECK-NEXT: lwz 4, -56(1) +; CHECK-NEXT: lwz 0, -48(1) +; CHECK-NEXT: mtcrf 128, 0 +; CHECK-NEXT: stw 4, -72(1) +; CHECK-NEXT: stw 3, -76(1) +; CHECK-NEXT: blt 0, .LBB0_10 +; CHECK-NEXT: # BB#9: # %entry +; CHECK-NEXT: lwz 3, -52(1) +; CHECK-NEXT: stw 3, -72(1) +; CHECK-NEXT: .LBB0_10: # %entry +; CHECK-NEXT: lwz 3, -72(1) +; CHECK-NEXT: lwz 4, -76(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stw 4, 4(5) +; CHECK-NEXT: lfd 0, 0(3) + store double %y, double* @var2, align 8 +; CHECK-NEXT: lis 3, var2@ha +; CHECK-NEXT: stfd 0, var2@l(3) + %z = va_arg %struct.__va_list_tag* %ap, i32; From r7 +; CHECK-NEXT: lbz 3, 0(5) +; CHECK-NEXT: lwz 4, 4(5) +; CHECK-NEXT: lwz 6, 8(5) +; CHECK-NEXT: slwi 7, 3, 2 +; CHECK-NEXT: addi 8, 3, 1 +; CHECK-NEXT: cmpwi 0, 3, 8 +; CHECK-NEXT: addi 3, 4, 4 +; CHECK-NEXT: add 6, 6, 7 +; CHECK-NEXT: mr 7, 4 +; CHECK-NEXT: stw 6, -80(1) +; CHECK-NEXT: stw 8, -84(1) +; CHECK-NEXT: stw 3, -88(1) +; CHECK-NEXT: stw 4, -92(1) +; CHECK-NEXT: stw 7, -96(1) +; CHECK-NEXT: mfcr 0 # cr0 +; CHECK-NEXT: stw 0, -100(1) +; CHECK-NEXT: blt 0, .LBB0_12 +; CHECK-NEXT: # BB#11: # %entry +; CHECK-NEXT: lwz 3, -88(1) +; CHECK-NEXT: stw 3, -96(1) +; CHECK-NEXT: .LBB0_12: # %entry +; CHECK-NEXT: lwz 3, -96(1) +; CHECK-NEXT: lwz 4, -84(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stb 4, 0(5) +; CHECK-NEXT: lwz 4, -80(1) +; CHECK-NEXT: lwz 0, -100(1) +; CHECK-NEXT: mtcrf 128, 0 +; CHECK-NEXT: stw 4, -104(1) +; CHECK-NEXT: stw 3, -108(1) +; CHECK-NEXT: blt 0, .LBB0_14 +; CHECK-NEXT: # BB#13: # %entry +; CHECK-NEXT: lwz 3, -92(1) +; CHECK-NEXT: stw 3, -104(1) +; CHECK-NEXT: .LBB0_14: # %entry +; CHECK-NEXT: lwz 3, -104(1) +; CHECK-NEXT: lwz 4, -108(1) +; CHECK-NEXT: lwz 5, -4(1) +; CHECK-NEXT: stw 4, 4(5) +; CHECK-NEXT: lwz 3, 0(3) + store i32 %z, i32* @var3, align 4 +; CHECK-NEXT: lis 4, var3@ha +; CHECK-NEXT: stw 3, var3@l(4) + ret void +; CHECK-NEXT: stw 5, -112(1) +; CHECK-NEXT: blr +} + diff --git a/test/CodeGen/Thumb/inlineasm-thumb.ll b/test/CodeGen/Thumb/inlineasm-thumb.ll new file mode 100644 index 0000000..f2683c8 --- /dev/null +++ b/test/CodeGen/Thumb/inlineasm-thumb.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=thumb | FileCheck %s +define i32 @t1(i32 %x, i32 %y) nounwind { +entry: + ; CHECK: mov r0, r12 + %0 = tail call i32 asm "mov $0, $1", "=l,h"(i32 %y) nounwind + ret i32 %0 +} diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll index 9aee910..18c2e0b 100644 --- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll +++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll @@ -13,7 +13,7 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string< ; CHECK: _ZNKSs7compareERKSs: ; CHECK: it eq ; CHECK-NEXT: subeq{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}} -; CHECK-NEXT: ldmia.w sp!, +; CHECK-NEXT: pop.w entry: %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3] %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3] diff --git a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll index 9e6d78e..b1ce3bb 100644 --- a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll +++ b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll @@ -1,13 +1,11 @@ -; RUN: llc < %s | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10" +; RUN: llc -mtriple=thumbv7-apple-darwin10 < %s | FileCheck %s %struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 } ; CHECK: Perl_ck_sort -; CHECK: ldr -; CHECK: mov [[REGISTER:(r[0-9]+)|(lr)]] -; CHECK: str {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24 +; CHECK: ldreq +; CHECK: moveq [[REGISTER:(r[0-9]+)|(lr)]] +; CHECK: streq {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24 define void @Perl_ck_sort() nounwind optsize { entry: diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll index d842d4d..1533040 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -70,8 +70,9 @@ entry: define void @t3(i32 %a, i32 %b) nounwind { entry: ; CHECK: t3: -; CHECK: it lt -; CHECK: poplt {r7, pc} +; CHECK: itt ge +; CHECK: movge r0, r1 +; CHECK: blge _foo %tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1] br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll index 53f45ea..492e5f0 100644 --- a/test/CodeGen/Thumb2/thumb2-sbc.ll +++ b/test/CodeGen/Thumb2/thumb2-sbc.ll @@ -56,7 +56,6 @@ define i64 @f6(i64 %a) { ; ; CHECK: livecarry: ; CHECK: adds -; CHECK: adcs ; CHECK: adc define i64 @livecarry(i64 %carry, i32 %digit) nounwind { %ch = lshr i64 %carry, 32 diff --git a/test/CodeGen/X86/adde-carry.ll b/test/CodeGen/X86/adde-carry.ll index 98c4f99..e86adf4 100644 --- a/test/CodeGen/X86/adde-carry.ll +++ b/test/CodeGen/X86/adde-carry.ll @@ -1,5 +1,4 @@ ; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64 -; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=CHECK-32 define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind { entry: @@ -16,11 +15,6 @@ entry: store i64 %8, i64* %t, align 8 ret void -; CHECK-32: addl -; CHECK-32: adcl -; CHECK-32: adcl $0 -; CHECK-32: adcl $0 - ; CHECK-64: addq ; CHECK-64: adcq $0 } diff --git a/test/CodeGen/X86/atomic-or.ll b/test/CodeGen/X86/atomic-or.ll new file mode 100644 index 0000000..cd62290 --- /dev/null +++ b/test/CodeGen/X86/atomic-or.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; rdar://9692967 + +define void @do_the_sync(i64* %p, i32 %b) nounwind { +entry: + %p.addr = alloca i64*, align 8 + store i64* %p, i64** %p.addr, align 8 + %tmp = load i64** %p.addr, align 8 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) +; CHECK: lock +; CHECK-NEXT: orq $2147483648 + %0 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %tmp, i64 2147483648) + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) + ret void +} +declare i64 @llvm.atomic.load.or.i64.p0i64(i64* nocapture, i64) nounwind +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll new file mode 100644 index 0000000..fb83fca --- /dev/null +++ b/test/CodeGen/X86/dbg-i128-const.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s | FileCheck %s + +; CHECK: DW_AT_const_value +; CHECK-NEXT: 42 + +define i128 @__foo(i128 %a, i128 %b) nounwind { +entry: + tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11 + %add = add i128 %a, %b, !dbg !11 + ret i128 %add, !dbg !11 +} + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!0 = metadata !{i128 42 } +!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ] +!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ] +!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ] +!4 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] +!7 = metadata !{metadata !8, metadata !8, metadata !8} +!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] +!9 = metadata !{i32 524329, metadata !"myint.h", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ] +!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!11 = metadata !{i32 29, i32 0, metadata !2, null} diff --git a/test/CodeGen/X86/fp-stack-O0.ll b/test/CodeGen/X86/fp-stack-O0.ll new file mode 100644 index 0000000..b9cb5d7 --- /dev/null +++ b/test/CodeGen/X86/fp-stack-O0.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -O0 | FileCheck %s +target triple = "x86_64-apple-macosx" + +declare x86_fp80 @x1(i32) nounwind +declare i32 @x2(x86_fp80, x86_fp80) nounwind + +; Keep track of the return value. +; CHECK: test1 +; CHECK: x1 +; Pass arguments on the stack. +; CHECK-NEXT: movq %rsp, [[RCX:%r..]] +; Copy constant-pool value. +; CHECK-NEXT: fldt LCPI +; CHECK-NEXT: fstpt 16([[RCX]]) +; Copy x1 return value. +; CHECK-NEXT: fstpt ([[RCX]]) +; CHECK-NEXT: x2 +define i32 @test1() nounwind uwtable ssp { +entry: + %call = call x86_fp80 (...)* bitcast (x86_fp80 (i32)* @x1 to x86_fp80 (...)*)(i32 -1) + %call1 = call i32 @x2(x86_fp80 %call, x86_fp80 0xK401EFFFFFFFF00000000) + ret i32 %call1 +} + diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll index c83a0cb..1307f70 100644 --- a/test/CodeGen/X86/fp-stack-ret.ll +++ b/test/CodeGen/X86/fp-stack-ret.ll @@ -1,25 +1,40 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t -; RUN: grep fldl %t | count 1 -; RUN: not grep xmm %t -; RUN: grep {sub.*esp} %t | count 1 +; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 | FileCheck %s ; These testcases shouldn't require loading into an XMM register then storing ; to memory, then reloading into an FPStack reg. +; CHECK: test1 +; CHECK: fldl +; CHECK-NEXT: ret define double @test1(double *%P) { %A = load double* %P ret double %A } -; fastcc should return a value +; fastcc should return a value +; CHECK: test2 +; CHECK-NOT: xmm +; CHECK: ret define fastcc double @test2(<2 x double> %A) { %B = extractelement <2 x double> %A, i32 0 ret double %B } +; CHECK: test3 +; CHECK: sub{{.*}}%esp +; CHECLK-NOT: xmm define fastcc double @test3(<4 x float> %A) { %B = bitcast <4 x float> %A to <2 x double> %C = call fastcc double @test2(<2 x double> %B) ret double %C } - + +; Clear the stack when not using a return value. +; CHECK: test4 +; CHECK: call +; CHECK: fstp +; CHECK: ret +define void @test4(double *%P) { + %A = call double @test1(double *%P) + ret void +} diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll index 6348fca..8e48bbe 100644 --- a/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/test/CodeGen/X86/inline-asm-fpstack.ll @@ -26,7 +26,7 @@ define double @test2() { ; CHECK-NOT: fstp ; CHECK: ret define void @test3(x86_fp80 %X) { - call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X) + call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X) ret void } @@ -37,7 +37,7 @@ define void @test3(x86_fp80 %X) { ; CHECK-NOT: fstp ; CHECK: ret define void @test4(double %X) { - call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %X) + call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %X) ret void } @@ -49,7 +49,7 @@ define void @test4(double %X) { ; CHECK: ret define void @test5(double %X) { %Y = fadd double %X, 123.0 - call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %Y) + call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %Y) ret void } @@ -86,3 +86,246 @@ entry: ret void } +; PR4185 +; Passing a non-killed value to asm in {st}. +; Make sure it is duped before. +; asm kills st(0), so we shouldn't pop anything +; CHECK: testPR4185 +; CHECK: fld %st(0) +; CHECK: fistpl +; CHECK-NOT: fstp +; CHECK: fistpl +; CHECK-NOT: fstp +; CHECK: ret +; A valid alternative would be to remat the constant pool load before each +; inline asm. +define void @testPR4185() { +return: + call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06) + call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06) + ret void +} + +; Passing a non-killed value through asm in {st}. +; Make sure it is not duped before. +; Second asm kills st(0), so we shouldn't pop anything +; CHECK: testPR4185b +; CHECK-NOT: fld %st(0) +; CHECK: fistl +; CHECK-NOT: fstp +; CHECK: fistpl +; CHECK-NOT: fstp +; CHECK: ret +; A valid alternative would be to remat the constant pool load before each +; inline asm. +define void @testPR4185b() { +return: + call void asm sideeffect "fistl $0", "{st}"(double 1.000000e+06) + call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06) + ret void +} + +; PR4459 +; The return value from ceil must be duped before being consumed by asm. +; CHECK: testPR4459 +; CHECK: ceil +; CHECK: fld %st(0) +; CHECK-NOT: fxch +; CHECK: fistpl +; CHECK-NOT: fxch +; CHECK: fstpt +; CHECK: test +define void @testPR4459(x86_fp80 %a) { +entry: + %0 = call x86_fp80 @ceil(x86_fp80 %a) + call void asm sideeffect "fistpl $0", "{st},~{st}"( x86_fp80 %0) + call void @test3(x86_fp80 %0 ) + ret void +} +declare x86_fp80 @ceil(x86_fp80) + +; PR4484 +; test1 leaves a value on the stack that is needed after the asm. +; CHECK: testPR4484 +; CHECK: test1 +; CHECK-NOT: fstp +; Load %a from stack after ceil +; CHECK: fldt +; CHECK-NOT: fxch +; CHECK: fistpl +; CHECK-NOT: fstp +; Set up call to test. +; CHECK: fstpt +; CHECK: test +define void @testPR4484(x86_fp80 %a) { +entry: + %0 = call x86_fp80 @test1() + call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a) + call void @test3(x86_fp80 %0) + ret void +} + +; PR4485 +; CHECK: testPR4485 +define void @testPR4485(x86_fp80* %a) { +entry: + %0 = load x86_fp80* %a, align 16 + %1 = fmul x86_fp80 %0, 0xK4006B400000000000000 + %2 = fmul x86_fp80 %1, 0xK4012F424000000000000 + tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2) + %3 = load x86_fp80* %a, align 16 + %4 = fmul x86_fp80 %3, 0xK4006B400000000000000 + %5 = fmul x86_fp80 %4, 0xK4012F424000000000000 + tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5) + ret void +} + +; An input argument in a fixed position is implicitly popped by the asm only if +; the input argument is tied to an output register, or it is in the clobber list. +; The clobber list case is tested above. +; +; This doesn't implicitly pop the stack: +; +; void fist1(long double x, int *p) { +; asm volatile ("fistl %1" : : "t"(x), "m"(*p)); +; } +; +; CHECK: fist1 +; CHECK: fldt +; CHECK: fistl (%e +; CHECK: fstp +; CHECK: ret +define void @fist1(x86_fp80 %x, i32* %p) nounwind ssp { +entry: + tail call void asm sideeffect "fistl $1", "{st},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind + ret void +} + +; Here, the input operand is tied to an output which means that is is +; implicitly popped (and then the output is implicitly pushed). +; +; long double fist2(long double x, int *p) { +; long double y; +; asm ("fistl %1" : "=&t"(y) : "0"(x), "m"(*p) : "memory"); +; return y; +; } +; +; CHECK: fist2 +; CHECK: fldt +; CHECK: fistl (%e +; CHECK-NOT: fstp +; CHECK: ret +define x86_fp80 @fist2(x86_fp80 %x, i32* %p) nounwind ssp { +entry: + %0 = tail call x86_fp80 asm "fistl $2", "=&{st},0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind + ret x86_fp80 %0 +} + +; An 'f' constraint is never implicitly popped: +; +; void fucomp1(long double x, long double y) { +; asm volatile ("fucomp %1" : : "t"(x), "f"(y) : "st"); +; } +; CHECK: fucomp1 +; CHECK: fldt +; CHECK: fldt +; CHECK: fucomp %st +; CHECK: fstp +; CHECK-NOT: fstp +; CHECK: ret +define void @fucomp1(x86_fp80 %x, x86_fp80 %y) nounwind ssp { +entry: + tail call void asm sideeffect "fucomp $1", "{st},f,~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind + ret void +} + +; The 'u' constraint is only popped implicitly when clobbered: +; +; void fucomp2(long double x, long double y) { +; asm volatile ("fucomp %1" : : "t"(x), "u"(y) : "st"); +; } +; +; void fucomp3(long double x, long double y) { +; asm volatile ("fucompp %1" : : "t"(x), "u"(y) : "st", "st(1)"); +; } +; +; CHECK: fucomp2 +; CHECK: fldt +; CHECK: fldt +; CHECK: fucomp %st(1) +; CHECK: fstp +; CHECK-NOT: fstp +; CHECK: ret +; +; CHECK: fucomp3 +; CHECK: fldt +; CHECK: fldt +; CHECK: fucompp %st(1) +; CHECK-NOT: fstp +; CHECK: ret +define void @fucomp2(x86_fp80 %x, x86_fp80 %y) nounwind ssp { +entry: + tail call void asm sideeffect "fucomp $1", "{st},{st(1)},~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind + ret void +} +define void @fucomp3(x86_fp80 %x, x86_fp80 %y) nounwind ssp { +entry: + tail call void asm sideeffect "fucompp $1", "{st},{st(1)},~{st},~{st(1)},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind + ret void +} + +; One input, two outputs, one dead output. +%complex = type { float, float } +; CHECK: sincos1 +; CHECK: flds +; CHECK-NOT: fxch +; CHECK: sincos +; CHECK-NOT: fstp +; CHECK: fstp %st(1) +; CHECK-NOT: fstp +; CHECK: ret +define float @sincos1(float %x) nounwind ssp { +entry: + %0 = tail call %complex asm "sincos", "={st},={st(1)},0,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind + %asmresult = extractvalue %complex %0, 0 + ret float %asmresult +} + +; Same thing, swapped output operands. +; CHECK: sincos2 +; CHECK: flds +; CHECK-NOT: fxch +; CHECK: sincos +; CHECK-NOT: fstp +; CHECK: fstp %st(1) +; CHECK-NOT: fstp +; CHECK: ret +define float @sincos2(float %x) nounwind ssp { +entry: + %0 = tail call %complex asm "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind + %asmresult = extractvalue %complex %0, 1 + ret float %asmresult +} + +; Clobber st(0) after it was live-out/dead from the previous asm. +; CHECK: sincos3 +; Load x, make a copy for the second asm. +; CHECK: flds +; CHECK: fld %st(0) +; CHECK: sincos +; Discard dead result in st(0), bring x to the top. +; CHECK: fstp %st(0) +; CHECK: fxch +; x is now in st(0) for the second asm +; CHECK: sincos +; Discard both results. +; CHECK: fstp +; CHECK: fstp +; CHECK: ret +define float @sincos3(float %x) nounwind ssp { +entry: + %0 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind + %1 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind + %asmresult = extractvalue %complex %0, 0 + ret float %asmresult +} diff --git a/test/CodeGen/X86/inline-asm-fpstack2.ll b/test/CodeGen/X86/inline-asm-fpstack2.ll deleted file mode 100644 index 78037e0..0000000 --- a/test/CodeGen/X86/inline-asm-fpstack2.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s -march=x86 | FileCheck %s -; PR4185 - -; Passing a non-killed value to asm in {st}. -; Make sure it is duped before. -; asm kills st(0), so we shouldn't pop anything -; CHECK: fld %st(0) -; CHECK: fistpl -; CHECK-NOT: fstp -; CHECK: fistpl -; CHECK-NOT: fstp -; CHECK: ret -define void @test() { -return: - call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06) - call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06) - ret void -} - -; A valid alternative would be to remat the constant pool load before each -; inline asm. diff --git a/test/CodeGen/X86/inline-asm-fpstack3.ll b/test/CodeGen/X86/inline-asm-fpstack3.ll deleted file mode 100644 index a609681..0000000 --- a/test/CodeGen/X86/inline-asm-fpstack3.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -march=x86 | FileCheck %s -; PR4459 - -; The return value from ceil must be duped before being consumed by asm. -; CHECK: ceil -; CHECK: fld %st(0) -; CHECK-NOT: fxch -; CHECK: fistpl -; CHECK-NOT: fxch -; CHECK: fstpt -; CHECK: test -define void @test2(x86_fp80 %a) { -entry: - %0 = call x86_fp80 @ceil(x86_fp80 %a) - call void asm sideeffect "fistpl $0", "{st}"( x86_fp80 %0) - call void @test(x86_fp80 %0 ) - ret void -} -declare x86_fp80 @ceil(x86_fp80) -declare void @test(x86_fp80) diff --git a/test/CodeGen/X86/inline-asm-fpstack4.ll b/test/CodeGen/X86/inline-asm-fpstack4.ll deleted file mode 100644 index ec572b4..0000000 --- a/test/CodeGen/X86/inline-asm-fpstack4.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=x86 | FileCheck %s -; PR4484 - -; ceil leaves a value on the stack that is needed after the asm. -; CHECK: ceil -; CHECK-NOT: fstp -; Load %a from stack after ceil -; CHECK: fldt -; CHECK-NOT: fxch -; CHECK: fistpl -; CHECK-NOT: fstp -; Set up call to test. -; CHECK: fstpt -; CHECK: test -define void @test2(x86_fp80 %a) { -entry: - %0 = call x86_fp80 @ceil() - call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a) - call void @test(x86_fp80 %0) - ret void -} - -declare x86_fp80 @ceil() -declare void @test(x86_fp80) diff --git a/test/CodeGen/X86/inline-asm-fpstack5.ll b/test/CodeGen/X86/inline-asm-fpstack5.ll deleted file mode 100644 index 8b219cf..0000000 --- a/test/CodeGen/X86/inline-asm-fpstack5.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -march=x86 -; PR4485 - -define void @test(x86_fp80* %a) { -entry: - %0 = load x86_fp80* %a, align 16 - %1 = fmul x86_fp80 %0, 0xK4006B400000000000000 - %2 = fmul x86_fp80 %1, 0xK4012F424000000000000 - tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2) - %3 = load x86_fp80* %a, align 16 - %4 = fmul x86_fp80 %3, 0xK4006B400000000000000 - %5 = fmul x86_fp80 %4, 0xK4012F424000000000000 - tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5) - ret void -} diff --git a/test/CodeGen/X86/longlong-deadload.ll b/test/CodeGen/X86/longlong-deadload.ll index 9a4c8f2..db91961e0 100644 --- a/test/CodeGen/X86/longlong-deadload.ll +++ b/test/CodeGen/X86/longlong-deadload.ll @@ -1,8 +1,11 @@ -; RUN: llc < %s -march=x86 | not grep '4{(%...)} +; RUN: llc < %s -march=x86 | FileCheck %s ; This should not load or store the top part of *P. define void @test(i64* %P) nounwind { -entry: +; CHECK: test: +; CHECK: movl 4(%esp), %[[REGISTER:.*]] +; CHECK-NOT: 4(%[[REGISTER]]) +; CHECK: ret %tmp1 = load i64* %P, align 8 ; <i64> [#uses=1] %tmp2 = xor i64 %tmp1, 1 ; <i64> [#uses=1] store i64 %tmp2, i64* %P, align 8 diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll index 6a08dae..945ec4c 100644 --- a/test/CodeGen/X86/pr1505b.ll +++ b/test/CodeGen/X86/pr1505b.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -mcpu=i486 | grep fstpl | count 5 -; RUN: llc < %s -mcpu=i486 | grep fstps | count 2 +; RUN: llc < %s -mcpu=i486 | FileCheck %s ; PR1505 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" @@ -30,19 +29,41 @@ declare void @_ZNSt8ios_base4InitC1Ev(%"struct.std::ctype_base"*) declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) +; CHECK: main define i32 @main() { entry: +; CHECK: flds %tmp6 = volatile load float* @a ; <float> [#uses=1] +; CHECK: fstps (%esp) +; CHECK: tanf %tmp9 = tail call float @tanf( float %tmp6 ) ; <float> [#uses=1] +; Spill returned value: +; CHECK: fstp + +; CHECK: fldl %tmp12 = volatile load double* @b ; <double> [#uses=1] +; CHECK: fstpl (%esp) +; CHECK: tan %tmp13 = tail call double @tan( double %tmp12 ) ; <double> [#uses=1] +; Spill returned value: +; CHECK: fstp %tmp1314 = fptrunc double %tmp13 to float ; <float> [#uses=1] %tmp16 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1] %tmp1920 = fpext float %tmp9 to double ; <double> [#uses=1] +; reload: +; CHECK: fld +; CHECK: fstpl +; CHECK: ZNSolsEd %tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1] %tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0] +; reload: +; CHECK: fld +; CHECK: fstps +; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc %tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1] %tmp3940 = fpext float %tmp1314 to double ; <double> [#uses=1] +; CHECK: fstpl +; CHECK: ZNSolsEd %tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1] %tmp51 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp42 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0] ret i32 0 diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll deleted file mode 100644 index b55bf57..0000000 --- a/test/CodeGen/X86/pre-split1.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \ -; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1 -; XFAIL: * - -define void @test(double* %P, i32 %cond) nounwind { -entry: - %0 = load double* %P, align 8 ; <double> [#uses=1] - %1 = fadd double %0, 4.000000e+00 ; <double> [#uses=2] - %2 = icmp eq i32 %cond, 0 ; <i1> [#uses=1] - br i1 %2, label %bb1, label %bb - -bb: ; preds = %entry - %3 = fadd double %1, 4.000000e+00 ; <double> [#uses=1] - br label %bb1 - -bb1: ; preds = %bb, %entry - %A.0 = phi double [ %3, %bb ], [ %1, %entry ] ; <double> [#uses=1] - %4 = fmul double %A.0, 4.000000e+00 ; <double> [#uses=1] - %5 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=0] - store double %4, double* %P, align 8 - ret void -} - -declare i32 @bar(...) diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll deleted file mode 100644 index 83c6450..0000000 --- a/test/CodeGen/X86/pre-split10.ll +++ /dev/null @@ -1,51 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan - -define i32 @main(i32 %argc, i8** %argv) nounwind { -entry: - br label %bb14.i - -bb14.i: ; preds = %bb14.i, %entry - %i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %0, %bb14.i ] ; <i32> [#uses=1] - %0 = add i32 %i8.0.reg2mem.0.i, 1 ; <i32> [#uses=2] - %1 = fadd double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1] - %2 = fadd double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1] - %3 = fadd double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1] - %exitcond75.i = icmp eq i32 %0, 32 ; <i1> [#uses=1] - br i1 %exitcond75.i, label %bb24.i, label %bb14.i - -bb24.i: ; preds = %bb14.i - %4 = fdiv double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1] - %5 = fdiv double %1, 0.000000e+00 ; <double> [#uses=1] - %6 = fdiv double %2, 0.000000e+00 ; <double> [#uses=1] - %7 = fdiv double %3, 0.000000e+00 ; <double> [#uses=1] - br label %bb31.i - -bb31.i: ; preds = %bb31.i, %bb24.i - %tmp.0.reg2mem.0.i = phi i32 [ 0, %bb24.i ], [ %indvar.next64.i, %bb31.i ] ; <i32> [#uses=1] - %indvar.next64.i = add i32 %tmp.0.reg2mem.0.i, 1 ; <i32> [#uses=2] - %exitcond65.i = icmp eq i32 %indvar.next64.i, 64 ; <i1> [#uses=1] - br i1 %exitcond65.i, label %bb33.i, label %bb31.i - -bb33.i: ; preds = %bb31.i - br label %bb35.preheader.i - -bb5.i.i: ; preds = %bb35.preheader.i - %8 = call double @floor(double 0.000000e+00) nounwind readnone ; <double> [#uses=0] - br label %bb7.i.i - -bb7.i.i: ; preds = %bb35.preheader.i, %bb5.i.i - br label %bb35.preheader.i - -bb35.preheader.i: ; preds = %bb7.i.i, %bb33.i - %9 = fsub double 0.000000e+00, %4 ; <double> [#uses=1] - store double %9, double* null, align 8 - %10 = fsub double 0.000000e+00, %5 ; <double> [#uses=1] - store double %10, double* null, align 8 - %11 = fsub double 0.000000e+00, %6 ; <double> [#uses=1] - store double %11, double* null, align 8 - %12 = fsub double 0.000000e+00, %7 ; <double> [#uses=1] - store double %12, double* null, align 8 - br i1 false, label %bb7.i.i, label %bb5.i.i -} - -declare double @floor(double) nounwind readnone diff --git a/test/CodeGen/X86/pre-split11.ll b/test/CodeGen/X86/pre-split11.ll deleted file mode 100644 index 3d549f9..0000000 --- a/test/CodeGen/X86/pre-split11.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | FileCheck %s - -@.str = private constant [28 x i8] c"\0A\0ADOUBLE D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1] -@.str1 = private constant [37 x i8] c"double to long l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1] -@.str2 = private constant [35 x i8] c"double to uint ui1 = %u\09\09(0x%x)\0A\00", align 8 ; <[35 x i8]*> [#uses=1] -@.str3 = private constant [37 x i8] c"double to ulong ul1 = %lu\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1] - -define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { -; CHECK: movsd %xmm0, (%rsp) -entry: - %0 = icmp sgt i32 %argc, 4 ; <i1> [#uses=1] - br i1 %0, label %bb, label %bb2 - -bb: ; preds = %entry - %1 = getelementptr inbounds i8** %argv, i64 4 ; <i8**> [#uses=1] - %2 = load i8** %1, align 8 ; <i8*> [#uses=1] - %3 = tail call double @atof(i8* %2) nounwind ; <double> [#uses=1] - br label %bb2 - -bb2: ; preds = %bb, %entry - %storemerge = phi double [ %3, %bb ], [ 2.000000e+00, %entry ] ; <double> [#uses=4] - %4 = fptoui double %storemerge to i32 ; <i32> [#uses=2] - %5 = fptoui double %storemerge to i64 ; <i64> [#uses=2] - %6 = fptosi double %storemerge to i64 ; <i64> [#uses=2] - %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), double %storemerge) nounwind ; <i32> [#uses=0] - %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str1, i64 0, i64 0), i64 %6, i64 %6) nounwind ; <i32> [#uses=0] - %9 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0), i32 %4, i32 %4) nounwind ; <i32> [#uses=0] - %10 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str3, i64 0, i64 0), i64 %5, i64 %5) nounwind ; <i32> [#uses=0] - ret i32 0 -} - -declare double @atof(i8* nocapture) nounwind readonly - -declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll deleted file mode 100644 index 37d1ac6..0000000 --- a/test/CodeGen/X86/pre-split4.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \ -; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 2 - -define i32 @main(i32 %argc, i8** %argv) nounwind { -entry: - br label %bb - -bb: ; preds = %bb, %entry - %k.0.reg2mem.0 = phi double [ 1.000000e+00, %entry ], [ %6, %bb ] ; <double> [#uses=2] - %Flint.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %5, %bb ] ; <double> [#uses=1] - %twoThrd.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ] ; <double> [#uses=1] - %0 = tail call double @llvm.pow.f64(double 0x3FE5555555555555, double 0.000000e+00) ; <double> [#uses=1] - %1 = fadd double %0, %twoThrd.0.reg2mem.0 ; <double> [#uses=1] - %2 = tail call double @sin(double %k.0.reg2mem.0) nounwind readonly ; <double> [#uses=1] - %3 = fmul double 0.000000e+00, %2 ; <double> [#uses=1] - %4 = fdiv double 1.000000e+00, %3 ; <double> [#uses=1] - store double %Flint.0.reg2mem.0, double* null - store double %twoThrd.0.reg2mem.0, double* null - %5 = fadd double %4, %Flint.0.reg2mem.0 ; <double> [#uses=1] - %6 = fadd double %k.0.reg2mem.0, 1.000000e+00 ; <double> [#uses=1] - br label %bb -} - -declare double @llvm.pow.f64(double, double) nounwind readonly - -declare double @sin(double) nounwind readonly diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll deleted file mode 100644 index 9f41f24..0000000 --- a/test/CodeGen/X86/pre-split5.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan - -target triple = "i386-apple-darwin9.5" - %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } - %struct.__sFILEX = type opaque - %struct.__sbuf = type { i8*, i32 } -@"\01LC1" = external constant [48 x i8] ; <[48 x i8]*> [#uses=1] - -define i32 @main() nounwind { -entry: - br label %bb5.us - -bb5.us: ; preds = %bb8.split, %bb5.us, %entry - %i.0.reg2mem.0.ph = phi i32 [ 0, %entry ], [ %indvar.next53, %bb8.split ], [ %i.0.reg2mem.0.ph, %bb5.us ] ; <i32> [#uses=2] - %j.0.reg2mem.0.us = phi i32 [ %indvar.next47, %bb5.us ], [ 0, %bb8.split ], [ 0, %entry ] ; <i32> [#uses=1] - %indvar.next47 = add i32 %j.0.reg2mem.0.us, 1 ; <i32> [#uses=2] - %exitcond48 = icmp eq i32 %indvar.next47, 256 ; <i1> [#uses=1] - br i1 %exitcond48, label %bb8.split, label %bb5.us - -bb8.split: ; preds = %bb5.us - %indvar.next53 = add i32 %i.0.reg2mem.0.ph, 1 ; <i32> [#uses=2] - %exitcond54 = icmp eq i32 %indvar.next53, 256 ; <i1> [#uses=1] - br i1 %exitcond54, label %bb11, label %bb5.us - -bb11: ; preds = %bb11, %bb8.split - %i.1.reg2mem.0 = phi i32 [ %indvar.next44, %bb11 ], [ 0, %bb8.split ] ; <i32> [#uses=1] - %indvar.next44 = add i32 %i.1.reg2mem.0, 1 ; <i32> [#uses=2] - %exitcond45 = icmp eq i32 %indvar.next44, 63 ; <i1> [#uses=1] - br i1 %exitcond45, label %bb14, label %bb11 - -bb14: ; preds = %bb14, %bb11 - %indvar = phi i32 [ %indvar.next40, %bb14 ], [ 0, %bb11 ] ; <i32> [#uses=1] - %indvar.next40 = add i32 %indvar, 1 ; <i32> [#uses=2] - %exitcond41 = icmp eq i32 %indvar.next40, 32768 ; <i1> [#uses=1] - br i1 %exitcond41, label %bb28, label %bb14 - -bb28: ; preds = %bb14 - %0 = fdiv double 2.550000e+02, 0.000000e+00 ; <double> [#uses=1] - br label %bb30 - -bb30: ; preds = %bb36, %bb28 - %m.1.reg2mem.0 = phi i32 [ %m.0, %bb36 ], [ 0, %bb28 ] ; <i32> [#uses=1] - %1 = fmul double 0.000000e+00, %0 ; <double> [#uses=1] - %2 = fptosi double %1 to i32 ; <i32> [#uses=1] - br i1 false, label %bb36, label %bb35 - -bb35: ; preds = %bb30 - %3 = tail call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([48 x i8]* @"\01LC1", i32 0, i32 0), i32 0, i32 0, i32 0, i32 %2) nounwind ; <i32> [#uses=0] - br label %bb36 - -bb36: ; preds = %bb35, %bb30 - %m.0 = phi i32 [ 0, %bb35 ], [ %m.1.reg2mem.0, %bb30 ] ; <i32> [#uses=1] - br label %bb30 -} - -declare i32 @fprintf(%struct.FILE*, i8*, ...) nounwind diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll deleted file mode 100644 index d8f274d..0000000 --- a/test/CodeGen/X86/pre-split6.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | grep {divsd 24} | count 1 - -@current_surfaces.b = external global i1 ; <i1*> [#uses=1] - -declare double @sin(double) nounwind readonly - -declare double @asin(double) nounwind readonly - -define fastcc void @trace_line(i32 %line) nounwind { -entry: - %.b3 = load i1* @current_surfaces.b ; <i1> [#uses=1] - br i1 %.b3, label %bb.nph, label %return - -bb.nph: ; preds = %entry - %0 = load double* null, align 8 ; <double> [#uses=1] - %1 = load double* null, align 8 ; <double> [#uses=2] - %2 = fcmp une double %0, 0.000000e+00 ; <i1> [#uses=1] - br i1 %2, label %bb9.i, label %bb13.i - -bb9.i: ; preds = %bb.nph - %3 = tail call double @asin(double 0.000000e+00) nounwind readonly ; <double> [#uses=0] - %4 = fdiv double 1.000000e+00, %1 ; <double> [#uses=1] - %5 = fmul double %4, 0.000000e+00 ; <double> [#uses=1] - %6 = tail call double @asin(double %5) nounwind readonly ; <double> [#uses=0] - unreachable - -bb13.i: ; preds = %bb.nph - %7 = fdiv double 1.000000e+00, %1 ; <double> [#uses=1] - %8 = tail call double @sin(double 0.000000e+00) nounwind readonly ; <double> [#uses=1] - %9 = fmul double %7, %8 ; <double> [#uses=1] - %10 = tail call double @asin(double %9) nounwind readonly ; <double> [#uses=0] - unreachable - -return: ; preds = %entry - ret void -} diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll deleted file mode 100644 index 8c93faa..0000000 --- a/test/CodeGen/X86/pre-split7.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan - -@object_distance = external global double, align 8 ; <double*> [#uses=1] -@axis_slope_angle = external global double, align 8 ; <double*> [#uses=1] -@current_surfaces.b = external global i1 ; <i1*> [#uses=1] - -declare double @sin(double) nounwind readonly - -declare double @asin(double) nounwind readonly - -declare double @tan(double) nounwind readonly - -define fastcc void @trace_line(i32 %line) nounwind { -entry: - %.b3 = load i1* @current_surfaces.b ; <i1> [#uses=1] - br i1 %.b3, label %bb, label %return - -bb: ; preds = %bb, %entry - %0 = tail call double @asin(double 0.000000e+00) nounwind readonly ; <double> [#uses=1] - %1 = fadd double 0.000000e+00, %0 ; <double> [#uses=2] - %2 = tail call double @asin(double 0.000000e+00) nounwind readonly ; <double> [#uses=1] - %3 = fsub double %1, %2 ; <double> [#uses=2] - store double %3, double* @axis_slope_angle, align 8 - %4 = fdiv double %1, 2.000000e+00 ; <double> [#uses=1] - %5 = tail call double @sin(double %4) nounwind readonly ; <double> [#uses=1] - %6 = fmul double 0.000000e+00, %5 ; <double> [#uses=1] - %7 = tail call double @tan(double %3) nounwind readonly ; <double> [#uses=0] - %8 = fadd double 0.000000e+00, %6 ; <double> [#uses=1] - store double %8, double* @object_distance, align 8 - br label %bb - -return: ; preds = %entry - ret void -} diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll deleted file mode 100644 index 7e6ad6e..0000000 --- a/test/CodeGen/X86/pre-split8.ll +++ /dev/null @@ -1,35 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \ -; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1 - -@current_surfaces.b = external global i1 ; <i1*> [#uses=1] - -declare double @asin(double) nounwind readonly - -declare double @tan(double) nounwind readonly - -define fastcc void @trace_line(i32 %line) nounwind { -entry: - %.b3 = load i1* @current_surfaces.b ; <i1> [#uses=1] - br i1 %.b3, label %bb, label %return - -bb: ; preds = %bb9.i, %entry - %.rle4 = phi double [ %7, %bb9.i ], [ 0.000000e+00, %entry ] ; <double> [#uses=1] - %0 = load double* null, align 8 ; <double> [#uses=3] - %1 = fcmp une double %0, 0.000000e+00 ; <i1> [#uses=1] - br i1 %1, label %bb9.i, label %bb13.i - -bb9.i: ; preds = %bb - %2 = fsub double %.rle4, %0 ; <double> [#uses=0] - %3 = tail call double @asin(double %.rle4) nounwind readonly ; <double> [#uses=0] - %4 = fmul double 0.000000e+00, %0 ; <double> [#uses=1] - %5 = tail call double @tan(double 0.000000e+00) nounwind readonly ; <double> [#uses=0] - %6 = fmul double %4, 0.000000e+00 ; <double> [#uses=1] - %7 = fadd double %6, 0.000000e+00 ; <double> [#uses=1] - br i1 false, label %return, label %bb - -bb13.i: ; preds = %bb - unreachable - -return: ; preds = %bb9.i, %entry - ret void -} diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll deleted file mode 100644 index 951e6fb..0000000 --- a/test/CodeGen/X86/pre-split9.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \ -; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1 - -@current_surfaces.b = external global i1 ; <i1*> [#uses=1] - -declare double @sin(double) nounwind readonly - -declare double @asin(double) nounwind readonly - -declare double @tan(double) nounwind readonly - -define fastcc void @trace_line(i32 %line) nounwind { -entry: - %.b3 = load i1* @current_surfaces.b ; <i1> [#uses=1] - br i1 %.b3, label %bb, label %return - -bb: ; preds = %bb9.i, %entry - %.rle4 = phi double [ %8, %bb9.i ], [ 0.000000e+00, %entry ] ; <double> [#uses=1] - %0 = load double* null, align 8 ; <double> [#uses=3] - %1 = fcmp une double %0, 0.000000e+00 ; <i1> [#uses=1] - br i1 %1, label %bb9.i, label %bb13.i - -bb9.i: ; preds = %bb - %2 = fsub double %.rle4, %0 ; <double> [#uses=0] - %3 = tail call double @asin(double %.rle4) nounwind readonly ; <double> [#uses=0] - %4 = tail call double @sin(double 0.000000e+00) nounwind readonly ; <double> [#uses=1] - %5 = fmul double %4, %0 ; <double> [#uses=1] - %6 = tail call double @tan(double 0.000000e+00) nounwind readonly ; <double> [#uses=0] - %7 = fmul double %5, 0.000000e+00 ; <double> [#uses=1] - %8 = fadd double %7, 0.000000e+00 ; <double> [#uses=1] - br i1 false, label %return, label %bb - -bb13.i: ; preds = %bb - unreachable - -return: ; preds = %bb9.i, %entry - ret void -} diff --git a/test/CodeGen/X86/sibcall-byval.ll b/test/CodeGen/X86/sibcall-byval.ll new file mode 100644 index 0000000..c335f30 --- /dev/null +++ b/test/CodeGen/X86/sibcall-byval.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=32 +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=64 + +%struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + +define i32 @f(%struct.p* byval align 4 %q) nounwind ssp { +entry: +; 32: _f: +; 32: jmp L_g$stub + +; 64: _f: +; 64: jmp _g + %call = tail call i32 @g(%struct.p* byval align 4 %q) nounwind + ret i32 %call +} + +declare i32 @g(%struct.p* byval align 4) + +define i32 @h(%struct.p* byval align 4 %q, i32 %r) nounwind ssp { +entry: +; 32: _h: +; 32: jmp L_i$stub + +; 64: _h: +; 64: jmp _i + + %call = tail call i32 @i(%struct.p* byval align 4 %q, i32 %r) nounwind + ret i32 %call +} + +declare i32 @i(%struct.p* byval align 4, i32) diff --git a/test/FrontendC/ARM/inline-asm-multichar.c b/test/FrontendC/ARM/inline-asm-multichar.c index 7e2eeef..bd88390 100644 --- a/test/FrontendC/ARM/inline-asm-multichar.c +++ b/test/FrontendC/ARM/inline-asm-multichar.c @@ -1,11 +1,11 @@ -// RUN: %llvmgcc -S -march=armv7a %s +// RUN: %llvmgcc -S -march=armv7a %s | FileCheck %s // XFAIL: * // XTARGET: arm int t1() { static float k = 1.0f; -CHECK: call void asm sideeffect "flds s15, $0 \0A", "*^Uv,~{s15}" + // CHECK: "flds s15, $0 \0A", "*^Uv,~{s15}" __asm__ volatile ("flds s15, %[k] \n" :: [k] "Uv,m" (k) : "s15"); return 0; } diff --git a/test/MC/ARM/arm-arithmetic-aliases.s b/test/MC/ARM/arm-arithmetic-aliases.s new file mode 100644 index 0000000..9895cfc --- /dev/null +++ b/test/MC/ARM/arm-arithmetic-aliases.s @@ -0,0 +1,126 @@ +@ RUN: llvm-mc -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s + +foo: +@ CHECK: foo + +sub r2, r2, #6 +sub r2, #6 +sub r2, r2, r3 +sub r2, r3 + +@ CHECK: sub r2, r2, #6 @ encoding: [0x06,0x20,0x42,0xe2] +@ CHECK: sub r2, r2, #6 @ encoding: [0x06,0x20,0x42,0xe2] +@ CHECK: sub r2, r2, r3 @ encoding: [0x03,0x20,0x42,0xe0] +@ CHECK: sub r2, r2, r3 @ encoding: [0x03,0x20,0x42,0xe0] + +add r2, r2, #6 +add r2, #6 +add r2, r2, r3 +add r2, r3 + +@ CHECK: add r2, r2, #6 @ encoding: [0x06,0x20,0x82,0xe2] +@ CHECK: add r2, r2, #6 @ encoding: [0x06,0x20,0x82,0xe2] +@ CHECK: add r2, r2, r3 @ encoding: [0x03,0x20,0x82,0xe0] +@ CHECK: add r2, r2, r3 @ encoding: [0x03,0x20,0x82,0xe0] + +and r2, r2, #6 +and r2, #6 +and r2, r2, r3 +and r2, r3 + +@ CHECK: and r2, r2, #6 @ encoding: [0x06,0x20,0x02,0xe2] +@ CHECK: and r2, r2, #6 @ encoding: [0x06,0x20,0x02,0xe2] +@ CHECK: and r2, r2, r3 @ encoding: [0x03,0x20,0x02,0xe0] +@ CHECK: and r2, r2, r3 @ encoding: [0x03,0x20,0x02,0xe0] + +orr r2, r2, #6 +orr r2, #6 +orr r2, r2, r3 +orr r2, r3 + +@ CHECK: orr r2, r2, #6 @ encoding: [0x06,0x20,0x82,0xe3] +@ CHECK: orr r2, r2, #6 @ encoding: [0x06,0x20,0x82,0xe3] +@ CHECK: orr r2, r2, r3 @ encoding: [0x03,0x20,0x82,0xe1] +@ CHECK: orr r2, r2, r3 @ encoding: [0x03,0x20,0x82,0xe1] + +eor r2, r2, #6 +eor r2, #6 +eor r2, r2, r3 +eor r2, r3 + +@ CHECK: eor r2, r2, #6 @ encoding: [0x06,0x20,0x22,0xe2] +@ CHECK: eor r2, r2, #6 @ encoding: [0x06,0x20,0x22,0xe2] +@ CHECK: eor r2, r2, r3 @ encoding: [0x03,0x20,0x22,0xe0] +@ CHECK: eor r2, r2, r3 @ encoding: [0x03,0x20,0x22,0xe0] + +bic r2, r2, #6 +bic r2, #6 +bic r2, r2, r3 +bic r2, r3 + +@ CHECK: bic r2, r2, #6 @ encoding: [0x06,0x20,0xc2,0xe3] +@ CHECK: bic r2, r2, #6 @ encoding: [0x06,0x20,0xc2,0xe3] +@ CHECK: bic r2, r2, r3 @ encoding: [0x03,0x20,0xc2,0xe1] +@ CHECK: bic r2, r2, r3 @ encoding: [0x03,0x20,0xc2,0xe1] + + +@ Also check that we handle the predicate and cc_out operands. +subseq r2, r2, #6 +subseq r2, #6 +subseq r2, r2, r3 +subseq r2, r3 + +@ CHECK: subseq r2, r2, #6 @ encoding: [0x06,0x20,0x52,0x02] +@ CHECK: subseq r2, r2, #6 @ encoding: [0x06,0x20,0x52,0x02] +@ CHECK: subseq r2, r2, r3 @ encoding: [0x03,0x20,0x52,0x00] +@ CHECK: subseq r2, r2, r3 @ encoding: [0x03,0x20,0x52,0x00] + +addseq r2, r2, #6 +addseq r2, #6 +addseq r2, r2, r3 +addseq r2, r3 + +@ CHECK: addseq r2, r2, #6 @ encoding: [0x06,0x20,0x92,0x02] +@ CHECK: addseq r2, r2, #6 @ encoding: [0x06,0x20,0x92,0x02] +@ CHECK: addseq r2, r2, r3 @ encoding: [0x03,0x20,0x92,0x00] +@ CHECK: addseq r2, r2, r3 @ encoding: [0x03,0x20,0x92,0x00] + +andseq r2, r2, #6 +andseq r2, #6 +andseq r2, r2, r3 +andseq r2, r3 + +@ CHECK: andseq r2, r2, #6 @ encoding: [0x06,0x20,0x12,0x02] +@ CHECK: andseq r2, r2, #6 @ encoding: [0x06,0x20,0x12,0x02] +@ CHECK: andseq r2, r2, r3 @ encoding: [0x03,0x20,0x12,0x00] +@ CHECK: andseq r2, r2, r3 @ encoding: [0x03,0x20,0x12,0x00] + +orrseq r2, r2, #6 +orrseq r2, #6 +orrseq r2, r2, r3 +orrseq r2, r3 + +@ CHECK: orrseq r2, r2, #6 @ encoding: [0x06,0x20,0x92,0x03] +@ CHECK: orrseq r2, r2, #6 @ encoding: [0x06,0x20,0x92,0x03] +@ CHECK: orrseq r2, r2, r3 @ encoding: [0x03,0x20,0x92,0x01] +@ CHECK: orrseq r2, r2, r3 @ encoding: [0x03,0x20,0x92,0x01] + +eorseq r2, r2, #6 +eorseq r2, #6 +eorseq r2, r2, r3 +eorseq r2, r3 + +@ CHECK: eorseq r2, r2, #6 @ encoding: [0x06,0x20,0x32,0x02] +@ CHECK: eorseq r2, r2, #6 @ encoding: [0x06,0x20,0x32,0x02] +@ CHECK: eorseq r2, r2, r3 @ encoding: [0x03,0x20,0x32,0x00] +@ CHECK: eorseq r2, r2, r3 @ encoding: [0x03,0x20,0x32,0x00] + +bicseq r2, r2, #6 +bicseq r2, #6 +bicseq r2, r2, r3 +bicseq r2, r3 + +@ CHECK: bicseq r2, r2, #6 @ encoding: [0x06,0x20,0xd2,0x03] +@ CHECK: bicseq r2, r2, #6 @ encoding: [0x06,0x20,0xd2,0x03] +@ CHECK: bicseq r2, r2, r3 @ encoding: [0x03,0x20,0xd2,0x01] +@ CHECK: bicseq r2, r2, r3 @ encoding: [0x03,0x20,0xd2,0x01] diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s index f789441..66fc87f3 100644 --- a/test/MC/ARM/arm_instructions.s +++ b/test/MC/ARM/arm_instructions.s @@ -21,22 +21,30 @@ vqdmull.s32 q8, d17, d16 @ CHECK: ldmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8] +@ CHECK: ldmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8] @ CHECK: ldmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe9] @ CHECK: ldmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe8] @ CHECK: ldmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe9] +@ CHECK: ldmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8] + ldm r2, {r1,r3-r6,sp} ldmia r2, {r1,r3-r6,sp} ldmib r2, {r1,r3-r6,sp} ldmda r2, {r1,r3-r6,sp} ldmdb r2, {r1,r3-r6,sp} + ldmfd r2, {r1,r3-r6,sp} @ CHECK: stmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe8] +@ CHECK: stmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe8] @ CHECK: stmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe9] @ CHECK: stmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe8] @ CHECK: stmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe9] +@ CHECK: stmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe9] + stm r2, {r1,r3-r6,sp} stmia r2, {r1,r3-r6,sp} stmib r2, {r1,r3-r6,sp} stmda r2, {r1,r3-r6,sp} stmdb r2, {r1,r3-r6,sp} + stmfd r2, {r1,r3-r6,sp} @ CHECK: ldmia r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe8] @ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9] diff --git a/test/MC/ARM/thumb2-movt-fixup.s b/test/MC/ARM/thumb2-movt-fixup.s new file mode 100644 index 0000000..ddd95b5 --- /dev/null +++ b/test/MC/ARM/thumb2-movt-fixup.s @@ -0,0 +1,17 @@ +@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s + +_fred: + movt r3, :upper16:(_wilma-(LPC0_0+4)) +LPC0_0: + +_wilma: + .long 0 + +@ CHECK: ('_relocations', [ +@ CHECK: # Relocation 0 +@ CHECK: (('word-0', 0xb9000000), +@ CHECK: ('word-1', 0x4)), +@ CHECK: # Relocation 1 +@ CHECK: (('word-0', 0xb100fffc), +@ CHECK: ('word-1', 0x4)), + diff --git a/test/MC/ARM/thumb2.s b/test/MC/ARM/thumb2.s index 4e9d4e1..41dda84 100644 --- a/test/MC/ARM/thumb2.s +++ b/test/MC/ARM/thumb2.s @@ -49,6 +49,22 @@ @ CHECK: mov.w r0, #66846720 @ encoding: [0x7f,0x70,0x4f,0xf0] mov.w r0, #66846720 +@ Aliases w/ the vanilla 'mov' mnemonic, and explicit alternative selection. + mov r2, #0xbf000000 + mov r1, #0x100 + mov r3, #32 + mov.w r3, #32 + movw r3, #32 + +@ CHECK: mov.w r2, #3204448256 @ encoding: [0x4f,0xf0,0x3f,0x42] +@ CHECK: mov.w r1, #256 @ encoding: [0x4f,0xf4,0x80,0x71] +@ CHECK: mov r3, #32 @ encoding: [0x20,0x23] +@ CHECK: mov.w r3, #32 @ encoding: [0x4f,0xf0,0x20,0x03] +@ CHECK: movw r3, #32 @ encoding: [0x40,0xf2,0x20,0x03] + + + + @ CHECK: rrx r0, r0 @ encoding: [0x30,0x00,0x4f,0xea] rrx r0, r0 @@ -302,3 +318,23 @@ ldrexd r0, r1, [r0] @ CHECK: ssat16 r0, #7, r0 @ encoding: [0x20,0xf3,0x06,0x00] ssat16 r0, #7, r0 + + and r1, #0xff + and r1, r1, #0xff + orr r1, 0x100 + orr r1, r1, 0x100 + eor r1, 0x100 + eor r1, r1, 0x100 + bic r1, 0x100 + bic r1, r1, 0x100 + +@ CHECK: and r1, r1, #255 @ encoding: [0x01,0xf0,0xff,0x01] +@ CHECK: and r1, r1, #255 @ encoding: [0x01,0xf0,0xff,0x01] +@ CHECK: orr r1, r1, #256 @ encoding: [0x41,0xf4,0x80,0x71] +@ CHECK: orr r1, r1, #256 @ encoding: [0x41,0xf4,0x80,0x71] +@ CHECK: eor r1, r1, #256 @ encoding: [0x81,0xf4,0x80,0x71] +@ CHECK: eor r1, r1, #256 @ encoding: [0x81,0xf4,0x80,0x71] +@ CHECK: bic r1, r1, #256 @ encoding: [0x21,0xf4,0x80,0x71] +@ CHECK: bic r1, r1, #256 @ encoding: [0x21,0xf4,0x80,0x71] + + diff --git a/test/MC/ARM/vpush-vpop.s b/test/MC/ARM/vpush-vpop.s new file mode 100644 index 0000000..1212c83 --- /dev/null +++ b/test/MC/ARM/vpush-vpop.s @@ -0,0 +1,19 @@ +@ RUN: llvm-mc -triple armv7-unknown-unknown -show-encoding < %s | FileCheck --check-prefix=CHECK-ARM %s +@ RUN: llvm-mc -triple thumbv7-unknown-unknown -show-encoding < %s | FileCheck --check-prefix=CHECK-THUMB %s + +foo: +@ CHECK: foo + vpush {d8, d9, d10, d11, d12} + vpush {s8, s9, s10, s11, s12} + vpop {d8, d9, d10, d11, d12} + vpop {s8, s9, s10, s11, s12} + +@ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b] +@ CHECK-THUMB: vpush {s8, s9, s10, s11, s12} @ encoding: [0x2d,0xed,0x05,0x4a] +@ CHECK-THUMB: vpop {d8, d9, d10, d11, d12} @ encoding: [0xbd,0xec,0x0a,0x8b] +@ CHECK-THUMB: vpop {s8, s9, s10, s11, s12} @ encoding: [0xbd,0xec,0x05,0x4a] + +@ CHECK-ARM: vpush {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0x2d,0xed] +@ CHECK-ARM: vpush {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0x2d,0xed] +@ CHECK-ARM: vpop {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0xbd,0xec] +@ CHECK-ARM: vpop {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0xbd,0xec] diff --git a/test/MC/AsmParser/exprs-invalid.s b/test/MC/AsmParser/exprs-invalid.s index dc27d80..88b2a0a 100644 --- a/test/MC/AsmParser/exprs-invalid.s +++ b/test/MC/AsmParser/exprs-invalid.s @@ -6,3 +6,9 @@ // CHECK-ERRORS: error: invalid hexadecimal number .long 80+0xzz + +// CHECK-ERRORS: error: literal value out of range for directive +.byte 256 + +// CHECK-ERRORS: error: literal value out of range for directive +.long 4e71cf69 // double floating point constant due to missing "0x" diff --git a/test/MC/X86/padlock.s b/test/MC/X86/padlock.s index 874786f..5c523e7 100644 --- a/test/MC/X86/padlock.s +++ b/test/MC/X86/padlock.s @@ -4,6 +4,10 @@ // CHECK: xstore // CHECK: encoding: [0x0f,0xa7,0xc0] + xstorerng +// CHECK: xstore +// CHECK: encoding: [0x0f,0xa7,0xc0] + rep xcryptecb // CHECK: rep // CHECK: encoding: [0xf3] diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s index d2dd78d..bdc54a6 100644 --- a/test/MC/X86/x86-32-coverage.s +++ b/test/MC/X86/x86-32-coverage.s @@ -503,7 +503,7 @@ // CHECK: ud2 ud2 -// CHECK: movnti %ecx, 3735928559(%ebx,%ecx,8) +// CHECK: movntil %ecx, 3735928559(%ebx,%ecx,8) movnti %ecx,0xdeadbeef(%ebx,%ecx,8) // CHECK: clflush 3735928559(%ebx,%ecx,8) @@ -4505,23 +4505,23 @@ // CHECK: encoding: [0xdf,0xea] fucomip %st(2),%st -// CHECK: movnti %ecx, 3735928559(%ebx,%ecx,8) +// CHECK: movntil %ecx, 3735928559(%ebx,%ecx,8) // CHECK: encoding: [0x0f,0xc3,0x8c,0xcb,0xef,0xbe,0xad,0xde] movnti %ecx,0xdeadbeef(%ebx,%ecx,8) -// CHECK: movnti %ecx, 69 +// CHECK: movntil %ecx, 69 // CHECK: encoding: [0x0f,0xc3,0x0d,0x45,0x00,0x00,0x00] movnti %ecx,0x45 -// CHECK: movnti %ecx, 32493 +// CHECK: movntil %ecx, 32493 // CHECK: encoding: [0x0f,0xc3,0x0d,0xed,0x7e,0x00,0x00] movnti %ecx,0x7eed -// CHECK: movnti %ecx, 3133065982 +// CHECK: movntil %ecx, 3133065982 // CHECK: encoding: [0x0f,0xc3,0x0d,0xfe,0xca,0xbe,0xba] movnti %ecx,0xbabecafe -// CHECK: movnti %ecx, 305419896 +// CHECK: movntil %ecx, 305419896 // CHECK: encoding: [0x0f,0xc3,0x0d,0x78,0x56,0x34,0x12] movnti %ecx,0x12345678 @@ -14177,19 +14177,19 @@ // CHECK: fucompi %st(2) fucomip %st(2),%st -// CHECK: movnti %ecx, 3735928559(%ebx,%ecx,8) +// CHECK: movntil %ecx, 3735928559(%ebx,%ecx,8) movnti %ecx,0xdeadbeef(%ebx,%ecx,8) -// CHECK: movnti %ecx, 69 - movnti %ecx,0x45 +// CHECK: movntil %ecx, 69 + movntil %ecx,0x45 -// CHECK: movnti %ecx, 32493 +// CHECK: movntil %ecx, 32493 movnti %ecx,0x7eed -// CHECK: movnti %ecx, 3133065982 +// CHECK: movntil %ecx, 3133065982 movnti %ecx,0xbabecafe -// CHECK: movnti %ecx, 305419896 +// CHECK: movntil %ecx, 305419896 movnti %ecx,0x12345678 // CHECK: clflush 3735928559(%ebx,%ecx,8) diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index 472748f..5074a1d 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -1136,3 +1136,15 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1] // CHECK: movd %rdi, %xmm0 // CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7] movd %rdi,%xmm0 + +// CHECK: movntil %eax, (%rdi) +// CHECK: encoding: [0x0f,0xc3,0x07] +// CHECK: movntil +movntil %eax, (%rdi) +movnti %eax, (%rdi) + +// CHECK: movntiq %rax, (%rdi) +// CHECK: encoding: [0x48,0x0f,0xc3,0x07] +// CHECK: movntiq +movntiq %rax, (%rdi) +movnti %rax, (%rdi) diff --git a/test/Makefile b/test/Makefile index 4e34e72..c0bc36c 100644 --- a/test/Makefile +++ b/test/Makefile @@ -185,7 +185,7 @@ lit.site.cfg: site.exp @$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> lit.tmp @$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp @$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp - @$(ECHOPATH) s=@LLVM_ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp + @$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp @sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@ @-rm -f lit.tmp diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll index 4a07d99..9e635fd 100644 --- a/test/Transforms/IndVarSimplify/ada-loops.ll +++ b/test/Transforms/IndVarSimplify/ada-loops.ll @@ -1,14 +1,18 @@ -; RUN: opt < %s -indvars -S > %t -; RUN: grep phi %t | count 4 -; RUN: grep {= phi i32} %t | count 4 -; RUN: not grep {sext i} %t -; RUN: not grep {zext i} %t -; RUN: not grep {trunc i} %t -; RUN: not grep {add i8} %t +; RUN: opt < %s -indvars -S | FileCheck %s +; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s +; ; PR1301 ; Do a bunch of analysis and prove that the loops can use an i32 trip ; count without casting. +; +; Note that all four functions should actually be converted to +; memset. However, this test case validates indvars behavior. We +; don't check that phis are "folded together" because that is a job +; for loop strength reduction. But indvars must remove sext, zext, +; trunc, and add i8. +; +; CHECK-NOT: {{sext|zext|trunc|add i8}} ; ModuleID = 'ada.bc' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32" diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll index c35feef..f38b54e 100644 --- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll +++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -23,6 +23,7 @@ ph: ; sext should be eliminated while preserving gep inboundsness. ; CHECK-NOT: sext ; CHECK: getelementptr inbounds +; CHECK: exit: loop: %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ] %s.01 = phi i32 [ 0, %ph ], [ %sinc, %loop ] @@ -63,6 +64,7 @@ ph: ; CHECK: getelementptr inbounds ; %vall sext should obviously not be eliminated ; CHECK: sext +; CHECK: exit: loop: %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ] %s.01 = phi i64 [ 0, %ph ], [ %sinc, %loop ] @@ -106,6 +108,7 @@ ph: ; Preserve gep inboundsness, and don't factor it. ; CHECK: getelementptr inbounds i32* %ptriv, i32 1 ; CHECK-NOT: add +; CHECK: exit: loop: %ptriv = phi i32* [ %first, %ph ], [ %ptrpost, %loop ] %ofs = sext i32 %idx to i64 @@ -121,3 +124,125 @@ exit: return: ret void } + +%struct = type { i32 } + +define void @bitcastiv(i32 %start, i32 %limit, i32 %step, %struct* %base) +nounwind +{ +entry: + br label %loop + +; CHECK: loop: +; +; Preserve casts +; CHECK: phi i32 +; CHECK: bitcast +; CHECK: getelementptr +; CHECK: exit: +loop: + %iv = phi i32 [%start, %entry], [%next, %loop] + %p = phi %struct* [%base, %entry], [%pinc, %loop] + %adr = getelementptr %struct* %p, i32 0, i32 0 + store i32 3, i32* %adr + %pp = bitcast %struct* %p to i32* + store i32 4, i32* %pp + %pinc = getelementptr %struct* %p, i32 1 + %next = add i32 %iv, 1 + %cond = icmp ne i32 %next, %limit + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + +define void @maxvisitor(i32 %limit, i32* %base) nounwind { +entry: + br label %loop + +; Test inserting a truncate at a phi use. +; +; CHECK: loop: +; CHECK: phi i64 +; CHECK: trunc +; CHECK: exit: +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %loop.inc ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop.inc ] + %idxprom = sext i32 %idx to i64 + %adr = getelementptr inbounds i32* %base, i64 %idxprom + %val = load i32* %adr + %cmp19 = icmp sgt i32 %val, %max + br i1 %cmp19, label %if.then, label %if.else + +if.then: + br label %loop.inc + +if.else: + br label %loop.inc + +loop.inc: + %max.next = phi i32 [ %idx, %if.then ], [ %max, %if.else ] + %idx.next = add nsw i32 %idx, 1 + %cmp = icmp slt i32 %idx.next, %limit + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +define void @identityphi(i32 %limit) nounwind { +entry: + br label %loop + +; Test an edge case of removing an identity phi that directly feeds +; back to the loop iv. +; +; CHECK: loop: +; CHECK: phi i32 +; CHECK-NOT: phi +; CHECK: exit: +loop: + %iv = phi i32 [ 0, %entry], [ %iv.next, %control ] + br i1 undef, label %if.then, label %control + +if.then: + br label %control + +control: + %iv.next = phi i32 [ %iv, %loop ], [ undef, %if.then ] + %cmp = icmp slt i32 %iv.next, %limit + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +define i64 @cloneOr(i32 %limit, i64* %base) nounwind { +entry: + ; ensure that the loop can't overflow + %halfLim = ashr i32 %limit, 2 + br label %loop + +; Test cloning an or, which is not an OverflowBinaryOperator. +; +; CHECK: loop: +; CHECK: phi i64 +; CHECK-NOT: sext +; CHECK: or i64 +; CHECK: exit: +loop: + %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ] + %t1 = sext i32 %iv to i64 + %adr = getelementptr i64* %base, i64 %t1 + %val = load i64* %adr + %t2 = or i32 %iv, 1 + %t3 = sext i32 %t2 to i64 + %iv.next = add i32 %iv, 2 + %cmp = icmp slt i32 %iv.next, %halfLim + br i1 %cmp, label %loop, label %exit + +exit: + %result = and i64 %val, %t3 + ret i64 %result +} diff --git a/test/Transforms/InstCombine/select-crash.ll b/test/Transforms/InstCombine/select-crash.ll index 8ee3369..18af152 100644 --- a/test/Transforms/InstCombine/select-crash.ll +++ b/test/Transforms/InstCombine/select-crash.ll @@ -18,3 +18,15 @@ entry: %add94 = fadd double undef, %mul91 ret double %add94 } + +; PR10180: same crash, but with vectors +define <4 x float> @foo(i1 %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK: @foo +; CHECK: fsub <4 x float> +; CHECK: select +; CHECK: fadd <4 x float> + %a = fadd <4 x float> %x, %y + %sub = fsub <4 x float> %x, %z + %sel = select i1 %b, <4 x float> %a, <4 x float> %sub + ret <4 x float> %sel +} diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll new file mode 100644 index 0000000..8134cc8 --- /dev/null +++ b/test/Transforms/InstSimplify/undef.ll @@ -0,0 +1,127 @@ +; RUN: opt -instsimplify -S < %s | FileCheck %s + +; @test0 +; CHECK: ret i64 undef +define i64 @test0() { + %r = mul i64 undef, undef + ret i64 %r +} + +; @test1 +; CHECK: ret i64 undef +define i64 @test1() { + %r = mul i64 3, undef + ret i64 %r +} + +; @test2 +; CHECK: ret i64 undef +define i64 @test2() { + %r = mul i64 undef, 3 + ret i64 %r +} + +; @test3 +; CHECK: ret i64 0 +define i64 @test3() { + %r = mul i64 undef, 6 + ret i64 %r +} + +; @test4 +; CHECK: ret i64 0 +define i64 @test4() { + %r = mul i64 6, undef + ret i64 %r +} + +; @test5 +; CHECK: ret i64 undef +define i64 @test5() { + %r = and i64 undef, undef + ret i64 %r +} + +; @test6 +; CHECK: ret i64 undef +define i64 @test6() { + %r = or i64 undef, undef + ret i64 %r +} + +; @test7 +; CHECK: ret i64 undef +define i64 @test7() { + %r = udiv i64 undef, 1 + ret i64 %r +} + +; @test8 +; CHECK: ret i64 undef +define i64 @test8() { + %r = sdiv i64 undef, 1 + ret i64 %r +} + +; @test9 +; CHECK: ret i64 0 +define i64 @test9() { + %r = urem i64 undef, 1 + ret i64 %r +} + +; @test10 +; CHECK: ret i64 0 +define i64 @test10() { + %r = srem i64 undef, 1 + ret i64 %r +} + +; @test11 +; CHECK: ret i64 undef +define i64 @test11() { + %r = shl i64 undef, undef + ret i64 %r +} + +; @test12 +; CHECK: ret i64 undef +define i64 @test12() { + %r = ashr i64 undef, undef + ret i64 %r +} + +; @test13 +; CHECK: ret i64 undef +define i64 @test13() { + %r = lshr i64 undef, undef + ret i64 %r +} + +; @test14 +; CHECK: ret i1 undef +define i1 @test14() { + %r = icmp slt i64 undef, undef + ret i1 %r +} + +; @test15 +; CHECK: ret i1 undef +define i1 @test15() { + %r = icmp ult i64 undef, undef + ret i1 %r +} + +; @test16 +; CHECK: ret i64 undef +define i64 @test16(i64 %a) { + %r = select i1 undef, i64 %a, i64 undef + ret i64 %r +} + +; @test17 +; CHECK: ret i64 undef +define i64 @test17(i64 %a) { + %r = select i1 undef, i64 undef, i64 %a + ret i64 %r +} diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll index 5b25864..59475ad 100644 --- a/test/Transforms/ScalarRepl/memcpy-from-global.ll +++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll @@ -93,4 +93,18 @@ define void @test4() { ret void } +declare void @llvm.lifetime.start(i64, i8*) +define void @test5() { + %A = alloca %T + %a = bitcast %T* %A to i8* + call void @llvm.lifetime.start(i64 -1, i8* %a) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false) + call void @baz(i8* byval %a) +; CHECK: @test5 +; CHECK-NEXT: %a = bitcast %T* @G to i8* +; CHECK-NEXT: call void @baz(i8* byval %a) + ret void +} + + declare void @baz(i8* byval) diff --git a/test/Transforms/SimplifyCFG/lifetime.ll b/test/Transforms/SimplifyCFG/lifetime.ll new file mode 100644 index 0000000..b794221 --- /dev/null +++ b/test/Transforms/SimplifyCFG/lifetime.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +; Test that a lifetime intrinsic doesn't prevent us from simplifying this. + +; CHECK: foo +; CHECK: entry: +; CHECK-NOT: bb0: +; CHECK-NOT: bb1: +; CHECK: ret +define void @foo(i1 %x) { +entry: + %a = alloca i8 + call void @llvm.lifetime.start(i64 -1, i8* %a) nounwind + br i1 %x, label %bb0, label %bb1 + +bb0: + call void @llvm.lifetime.end(i64 -1, i8* %a) nounwind + br label %bb1 + +bb1: + call void @f() + ret void +} + +declare void @f() + +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind + +declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index 6c33831..fe152ef 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -7,7 +7,7 @@ config.llvmgcc_dir = "@LLVMGCCDIR@" config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" config.python_executable = "@PYTHON_EXECUTABLE@" config.enable_shared = @ENABLE_SHARED@ -config.enable_assertions = @LLVM_ENABLE_ASSERTIONS@ +config.enable_assertions = @ENABLE_ASSERTIONS@ # Support substitution of the tools_dir with user parameters. This is # used when we can't determine the tool dir at configuration time. |
