aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-09-28 02:50:50 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-09-28 02:50:50 +0000
commit9c598cfebcc3387676995873e65ae4fed96b3edc (patch)
tree1da7067848100b72dee7a60974c1734b2e468769 /test/CodeGen/R600
parentbbafe422d6f9036b03992ee5eacb5d09644c3267 (diff)
downloadexternal_llvm-9c598cfebcc3387676995873e65ae4fed96b3edc.zip
external_llvm-9c598cfebcc3387676995873e65ae4fed96b3edc.tar.gz
external_llvm-9c598cfebcc3387676995873e65ae4fed96b3edc.tar.bz2
R600: Fix handling of NAN in comparison instructions
We were completely ignoring the unorder/ordered attributes of condition codes and also incorrectly lowering seto and setuo. Reviewed-by: Vincent Lejeune<vljn at ovi.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191603 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/fmax.ll2
-rw-r--r--test/CodeGen/R600/kcache-fold.ll16
-rw-r--r--test/CodeGen/R600/pv.ll2
-rw-r--r--test/CodeGen/R600/selectcc-opt.ll4
-rw-r--r--test/CodeGen/R600/set-dx10.ll60
-rw-r--r--test/CodeGen/R600/unsupported-cc.ll60
-rw-r--r--test/CodeGen/R600/vselect.ll4
7 files changed, 87 insertions, 61 deletions
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
index 8b704e5..be25c9c 100644
--- a/test/CodeGen/R600/fmax.ll
+++ b/test/CodeGen/R600/fmax.ll
@@ -5,7 +5,7 @@
define void @test() {
%r0 = call float @llvm.R600.load.input(i32 0)
%r1 = call float @llvm.R600.load.input(i32 1)
- %r2 = fcmp uge float %r0, %r1
+ %r2 = fcmp oge float %r0, %r1
%r3 = select i1 %r2, float %r0, float %r1
call void @llvm.AMDGPU.store.output(float %r3, i32 0)
ret void
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 8bdb050..0baa3cd 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -10,7 +10,7 @@ main_body:
%3 = extractelement <4 x float> %2, i32 0
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%5 = extractelement <4 x float> %4, i32 0
- %6 = fcmp ult float %1, 0.000000e+00
+ %6 = fcmp ogt float %1, 0.000000e+00
%7 = select i1 %6, float %3, float %5
%8 = load <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 1
@@ -18,7 +18,7 @@ main_body:
%11 = extractelement <4 x float> %10, i32 1
%12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%13 = extractelement <4 x float> %12, i32 1
- %14 = fcmp ult float %9, 0.000000e+00
+ %14 = fcmp ogt float %9, 0.000000e+00
%15 = select i1 %14, float %11, float %13
%16 = load <4 x float> addrspace(8)* null
%17 = extractelement <4 x float> %16, i32 2
@@ -26,7 +26,7 @@ main_body:
%19 = extractelement <4 x float> %18, i32 2
%20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%21 = extractelement <4 x float> %20, i32 2
- %22 = fcmp ult float %17, 0.000000e+00
+ %22 = fcmp ogt float %17, 0.000000e+00
%23 = select i1 %22, float %19, float %21
%24 = load <4 x float> addrspace(8)* null
%25 = extractelement <4 x float> %24, i32 3
@@ -34,7 +34,7 @@ main_body:
%27 = extractelement <4 x float> %26, i32 3
%28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%29 = extractelement <4 x float> %28, i32 3
- %30 = fcmp ult float %25, 0.000000e+00
+ %30 = fcmp ogt float %25, 0.000000e+00
%31 = select i1 %30, float %27, float %29
%32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
%33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
@@ -58,7 +58,7 @@ main_body:
%3 = extractelement <4 x float> %2, i32 0
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 1
- %6 = fcmp ult float %1, 0.000000e+00
+ %6 = fcmp ogt float %1, 0.000000e+00
%7 = select i1 %6, float %3, float %5
%8 = load <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 1
@@ -66,7 +66,7 @@ main_body:
%11 = extractelement <4 x float> %10, i32 0
%12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%13 = extractelement <4 x float> %12, i32 1
- %14 = fcmp ult float %9, 0.000000e+00
+ %14 = fcmp ogt float %9, 0.000000e+00
%15 = select i1 %14, float %11, float %13
%16 = load <4 x float> addrspace(8)* null
%17 = extractelement <4 x float> %16, i32 2
@@ -74,7 +74,7 @@ main_body:
%19 = extractelement <4 x float> %18, i32 3
%20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%21 = extractelement <4 x float> %20, i32 2
- %22 = fcmp ult float %17, 0.000000e+00
+ %22 = fcmp ogt float %17, 0.000000e+00
%23 = select i1 %22, float %19, float %21
%24 = load <4 x float> addrspace(8)* null
%25 = extractelement <4 x float> %24, i32 3
@@ -82,7 +82,7 @@ main_body:
%27 = extractelement <4 x float> %26, i32 3
%28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%29 = extractelement <4 x float> %28, i32 2
- %30 = fcmp ult float %25, 0.000000e+00
+ %30 = fcmp ogt float %25, 0.000000e+00
%31 = select i1 %30, float %27, float %29
%32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
%33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 6e0b744..6d9396c 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 | FileCheck %s
;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-;CHECK: CNDGE T{{[0-9].[XYZW]}}, PV.X
+;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
define void @main() #0 {
main_body:
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
index 7e2d559..834c030 100644
--- a/test/CodeGen/R600/selectcc-opt.ll
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -6,7 +6,7 @@
define void @test_a(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ult float %in, 0.000000e+00
+ %0 = fcmp olt float %in, 0.000000e+00
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%3 = fptosi float %2 to i32
@@ -34,7 +34,7 @@ ENDIF:
; CHECK-NEXT: ALU clause starting
define void @test_b(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ult float %in, 0.0
+ %0 = fcmp olt float %in, 0.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%3 = fptosi float %2 to i32
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
index bdc2ff4..5c7d499 100644
--- a/test/CodeGen/R600/set-dx10.ll
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -30,13 +30,13 @@ entry:
ret void
}
-; CHECK: @fcmp_ueq_select_fptosi
+; CHECK: @fcmp_oeq_select_fptosi
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ueq float %in, 5.0
+ %0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%3 = fptosi float %2 to i32
@@ -44,25 +44,25 @@ entry:
ret void
}
-; CHECK: @fcmp_ueq_select_i32
+; CHECK: @fcmp_oeq_select_i32
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ueq float %in, 5.0
+ %0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
ret void
}
-; CHECK: @fcmp_ugt_select_fptosi
+; CHECK: @fcmp_ogt_select_fptosi
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ugt float %in, 5.0
+ %0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%3 = fptosi float %2 to i32
@@ -70,25 +70,25 @@ entry:
ret void
}
-; CHECK: @fcmp_ugt_select_i32
+; CHECK: @fcmp_ogt_select_i32
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ugt float %in, 5.0
+ %0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
ret void
}
-; CHECK: @fcmp_uge_select_fptosi
+; CHECK: @fcmp_oge_select_fptosi
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp uge float %in, 5.0
+ %0 = fcmp oge float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%3 = fptosi float %2 to i32
@@ -96,25 +96,25 @@ entry:
ret void
}
-; CHECK: @fcmp_uge_select_i32
+; CHECK: @fcmp_oge_select_i32
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp uge float %in, 5.0
+ %0 = fcmp oge float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
ret void
}
-; CHECK: @fcmp_ule_select_fptosi
+; CHECK: @fcmp_ole_select_fptosi
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ule float %in, 5.0
+ %0 = fcmp ole float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%3 = fptosi float %2 to i32
@@ -122,25 +122,25 @@ entry:
ret void
}
-; CHECK: @fcmp_ule_select_i32
+; CHECK: @fcmp_ole_select_i32
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ule float %in, 5.0
+ %0 = fcmp ole float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
ret void
}
-; CHECK: @fcmp_ult_select_fptosi
+; CHECK: @fcmp_olt_select_fptosi
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ult float %in, 5.0
+ %0 = fcmp olt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%3 = fptosi float %2 to i32
@@ -148,13 +148,13 @@ entry:
ret void
}
-; CHECK: @fcmp_ult_select_i32
+; CHECK: @fcmp_olt_select_i32
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
- %0 = fcmp ult float %in, 5.0
+ %0 = fcmp olt float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
index d3aa060..f986a02 100644
--- a/test/CodeGen/R600/unsupported-cc.ll
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -2,7 +2,7 @@
; These tests are for condition codes that are not supported by the hardware
-; CHECK: @slt
+; CHECK-LABEL: @slt
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 5(7.006492e-45)
@@ -14,7 +14,7 @@ entry:
ret void
}
-; CHECK: @ult_i32
+; CHECK-LABEL: @ult_i32
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 5(7.006492e-45)
@@ -26,10 +26,11 @@ entry:
ret void
}
-; CHECK: @ult_float
-; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK-LABEL: @ult_float
+; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
; CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
+; CHECK-NEXT: LSHR *
define void @ult_float(float addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ult float %in, 5.0
@@ -38,10 +39,22 @@ entry:
ret void
}
-; CHECK: @olt
-; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
-;CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-LABEL: @ult_float_native
+; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
+define void @ult_float_native(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 5.0
+ %1 = select i1 %0, float 0.0, float 1.0
+ store float %1, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @olt
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
define void @olt(float addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 5.0
@@ -50,7 +63,7 @@ entry:
ret void
}
-; CHECK: @sle
+; CHECK-LABEL: @sle
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 6(8.407791e-45)
@@ -62,7 +75,7 @@ entry:
ret void
}
-; CHECK: @ule_i32
+; CHECK-LABEL: @ule_i32
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 6(8.407791e-45)
@@ -74,10 +87,11 @@ entry:
ret void
}
-; CHECK: @ule_float
-; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK-LABEL: @ule_float
+; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
; CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
+; CHECK-NEXT: LSHR *
define void @ule_float(float addrspace(1)* %out, float %in) {
entry:
%0 = fcmp ule float %in, 5.0
@@ -86,9 +100,21 @@ entry:
ret void
}
-; CHECK: @ole
-; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK-LABEL: @ule_float_native
+; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
+define void @ule_float_native(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ule float %in, 5.0
+ %1 = select i1 %0, float 0.0, float 1.0
+ store float %1, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @ole
+; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR *
; CHECK-NEXT:1084227584(5.000000e+00)
define void @ole(float addrspace(1)* %out, float %in) {
entry:
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index 8e9c5b5..ee17e0f 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -31,7 +31,7 @@ define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrs
entry:
%0 = load <2 x float> addrspace(1)* %in0
%1 = load <2 x float> addrspace(1)* %in1
- %cmp = fcmp one <2 x float> %0, %1
+ %cmp = fcmp une <2 x float> %0, %1
%result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
@@ -69,7 +69,7 @@ define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrs
entry:
%0 = load <4 x float> addrspace(1)* %in0
%1 = load <4 x float> addrspace(1)* %in1
- %cmp = fcmp one <4 x float> %0, %1
+ %cmp = fcmp une <4 x float> %0, %1
%result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void