aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2013-03-20 04:33:54 +0000
committerAndroid Git Automerger <android-git-automerger@android.com>2013-03-20 04:33:54 +0000
commit3dd51ae3a043f2edf9dd2bc7c906c3f602967e5a (patch)
tree90c0395880593bf195fb818c2af1139cb7e846df /test/CodeGen/R600
parent84ba0bec3eb1a5f63c13a01e6d510ecd85fa6ab7 (diff)
parent2d4629c5d7dcc6582fa7b85a517744f1a3654eba (diff)
downloadexternal_llvm-3dd51ae3a043f2edf9dd2bc7c906c3f602967e5a.zip
external_llvm-3dd51ae3a043f2edf9dd2bc7c906c3f602967e5a.tar.gz
external_llvm-3dd51ae3a043f2edf9dd2bc7c906c3f602967e5a.tar.bz2
am 2d4629c5: Merge branch \'upstream\' into merge_2013_03_18
* commit '2d4629c5d7dcc6582fa7b85a517744f1a3654eba': (424 commits) Change NULL to 0. Register the flush function for each compile unit. Remove trailing spaces. Fix PPC unaligned 64-bit loads and stores ARM cost model: Make some vector integer to float casts cheaper ARM cost model: Correct cost for some cheap float to integer conversions Extend global merge pass to optionally consider global constant variables. Also add some checks to not merge globals used within landing pad instructions or marked as "used". Change test cases to handle unaligned references. Remove unnecessary leading comment characters in lit-only file Add SchedRW annotations to most of X86InstrSSE.td. Annotate X86 arithmetic instructions with SchedRW lists. Check whether a pointer is non-null (isKnownNonNull) in isKnownNonZero. TableGen fix for the new machine model. Include '.test' suffix in target specific lit configs that need it Make the fields in the diagram match the descriptive text above them. Update Fix 80-col. violations in PPCCTRLoops Fix large count and negative constant count handling in PPCCTRLoops Cleanup initial-value constants in PPCCTRLoops Fix integer comparison in DIEInteger::BestForm. ...
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/fcmp-cnde-int-args.ll6
-rw-r--r--test/CodeGen/R600/fcmp.ll29
-rw-r--r--test/CodeGen/R600/fdiv.v4f32.ll8
-rw-r--r--test/CodeGen/R600/icmp-select-sete-reverse-args.ll2
-rw-r--r--test/CodeGen/R600/kcache-fold.ll76
-rw-r--r--test/CodeGen/R600/legalizedag-bug-expand-setcc.ll26
-rw-r--r--test/CodeGen/R600/literals.ll2
-rw-r--r--test/CodeGen/R600/llvm.SI.fs.interp.constant.ll14
-rw-r--r--test/CodeGen/R600/llvm.SI.sample.ll30
-rw-r--r--test/CodeGen/R600/schedule-fs-loop-nested-if.ll83
-rw-r--r--test/CodeGen/R600/schedule-fs-loop-nested.ll88
-rw-r--r--test/CodeGen/R600/schedule-fs-loop.ll55
-rw-r--r--test/CodeGen/R600/schedule-if-2.ll94
-rw-r--r--test/CodeGen/R600/schedule-if.ll46
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop.ll134
-rw-r--r--test/CodeGen/R600/selectcc-opt.ll64
16 files changed, 708 insertions, 49 deletions
diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll
index 5c981ef..55aba0d 100644
--- a/test/CodeGen/R600/fcmp-cnde-int-args.ll
+++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -1,10 +1,10 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the
-; chance to optimize the fcmp + select instructions to CNDE was missed
+; chance to optimize the fcmp + select instructions to SET* was missed
; due to the fact that the operands to fcmp and select had different types
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}}
+; CHECK: SET{{[A-Z]+}}_DX10
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index 89f5e9e..37f621d 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,8 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-;CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: @fcmp_sext
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
%0 = load float addrspace(1)* %in
%arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1
@@ -12,3 +13,25 @@ entry:
store i32 %sext, i32 addrspace(1)* %out
ret void
}
+
+; This test checks that a setcc node with f32 operands is lowered to a
+; SET*_DX10 instruction. Previously we were lowering this to:
+; SET* + FP_TO_SINT
+
+; CHECK: @fcmp_br
+; CHECK: SET{{[N]*}}E_DX10 T{{[0-9]+\.[XYZW], [a-zA-Z0-9, .]+}}(5.0
+
+define void @fcmp_br(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp oeq float %in, 5.0
+ br i1 %0, label %IF, label %ENDIF
+
+IF:
+ %1 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 0, i32 addrspace(1)* %1
+ br label %ENDIF
+
+ENDIF:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll
index b013fd6..79e677f 100644
--- a/test/CodeGen/R600/fdiv.v4f32.ll
+++ b/test/CodeGen/R600/fdiv.v4f32.ll
@@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
index aad44d9..71705a6 100644
--- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -4,7 +4,7 @@
;to a SETNE_INT. There should only be one SETNE_INT instruction.
;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK_NOT: SETNE_INT
+;CHECK-NOT: SETNE_INT
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 382f78c..e8e2bf5 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -1,38 +1,38 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; CHECK: @main1
; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
-
-define void @main() {
+define void @main1() {
main_body:
- %0 = load <4 x float> addrspace(9)* null
+ %0 = load <4 x float> addrspace(8)* null
%1 = extractelement <4 x float> %0, i32 0
- %2 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%3 = extractelement <4 x float> %2, i32 0
- %4 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%5 = extractelement <4 x float> %4, i32 0
%6 = fcmp ult float %1, 0.000000e+00
%7 = select i1 %6, float %3, float %5
- %8 = load <4 x float> addrspace(9)* null
+ %8 = load <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 1
- %10 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%11 = extractelement <4 x float> %10, i32 1
- %12 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%13 = extractelement <4 x float> %12, i32 1
%14 = fcmp ult float %9, 0.000000e+00
%15 = select i1 %14, float %11, float %13
- %16 = load <4 x float> addrspace(9)* null
+ %16 = load <4 x float> addrspace(8)* null
%17 = extractelement <4 x float> %16, i32 2
- %18 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%19 = extractelement <4 x float> %18, i32 2
- %20 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%21 = extractelement <4 x float> %20, i32 2
%22 = fcmp ult float %17, 0.000000e+00
%23 = select i1 %22, float %19, float %21
- %24 = load <4 x float> addrspace(9)* null
+ %24 = load <4 x float> addrspace(8)* null
%25 = extractelement <4 x float> %24, i32 3
- %26 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%27 = extractelement <4 x float> %26, i32 3
- %28 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%29 = extractelement <4 x float> %28, i32 3
%30 = fcmp ult float %25, 0.000000e+00
%31 = select i1 %30, float %27, float %29
@@ -48,5 +48,53 @@ main_body:
ret void
}
+; CHECK: @main2
+; CHECK-NOT: MOV
+define void @main2() {
+main_body:
+ %0 = load <4 x float> addrspace(8)* null
+ %1 = extractelement <4 x float> %0, i32 0
+ %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %3 = extractelement <4 x float> %2, i32 0
+ %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %5 = extractelement <4 x float> %4, i32 1
+ %6 = fcmp ult float %1, 0.000000e+00
+ %7 = select i1 %6, float %3, float %5
+ %8 = load <4 x float> addrspace(8)* null
+ %9 = extractelement <4 x float> %8, i32 1
+ %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %11 = extractelement <4 x float> %10, i32 0
+ %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %13 = extractelement <4 x float> %12, i32 1
+ %14 = fcmp ult float %9, 0.000000e+00
+ %15 = select i1 %14, float %11, float %13
+ %16 = load <4 x float> addrspace(8)* null
+ %17 = extractelement <4 x float> %16, i32 2
+ %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %19 = extractelement <4 x float> %18, i32 3
+ %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %21 = extractelement <4 x float> %20, i32 2
+ %22 = fcmp ult float %17, 0.000000e+00
+ %23 = select i1 %22, float %19, float %21
+ %24 = load <4 x float> addrspace(8)* null
+ %25 = extractelement <4 x float> %24, i32 3
+ %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %27 = extractelement <4 x float> %26, i32 3
+ %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %29 = extractelement <4 x float> %28, i32 2
+ %30 = fcmp ult float %25, 0.000000e+00
+ %31 = select i1 %30, float %27, float %29
+ %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+ %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+ %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+ %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+ %36 = insertelement <4 x float> undef, float %32, i32 0
+ %37 = insertelement <4 x float> %36, float %33, i32 1
+ %38 = insertelement <4 x float> %37, float %34, i32 2
+ %39 = insertelement <4 x float> %38, float %35, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+ ret void
+}
+
declare float @llvm.AMDIL.clamp.(float, float, float) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
new file mode 100644
index 0000000..1aae7f9
--- /dev/null
+++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This tests a bug where LegalizeDAG was not checking the target's
+; BooleanContents value and always using one for true, when expanding
+; setcc to select_cc.
+;
+; This bug caused the icmp IR instruction to be expanded to two machine
+; instructions, when only one is needed.
+;
+
+; CHECK: @setcc_expand
+; CHECK: SET
+; CHECK-NOT: CND
+define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp eq i32 %in, 5
+ br i1 %0, label %IF, label %ENDIF
+IF:
+ %1 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 0, i32 addrspace(1)* %1
+ br label %ENDIF
+
+ENDIF:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
index be62342..e69f64e 100644
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -6,7 +6,7 @@
; or
; ADD_INT literal.x REG, 5
-; CHECK; @i32_literal
+; CHECK: @i32_literal
; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5
define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
entry:
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
index 0c19f14..a8f604a 100644
--- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -3,20 +3,18 @@
;CHECK: S_MOV_B32
;CHECK-NEXT: V_INTERP_MOV_F32
-define void @main() {
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
main_body:
- call void @llvm.AMDGPU.shader.type(i32 0)
- %0 = load i32 addrspace(8)* inttoptr (i32 6 to i32 addrspace(8)*)
- %1 = call float @llvm.SI.fs.interp.constant(i32 0, i32 0, i32 %0)
- %2 = call i32 @llvm.SI.packf16(float %1, float %1)
- %3 = bitcast i32 %2 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
+ %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
+ %5 = call i32 @llvm.SI.packf16(float %4, float %4)
+ %6 = bitcast i32 %5 to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
ret void
}
declare void @llvm.AMDGPU.shader.type(i32)
-declare float @llvm.SI.fs.interp.constant(i32, i32, i32) readonly
+declare float @llvm.SI.fs.constant(i32, i32, i32) readonly
declare i32 @llvm.SI.packf16(float, float) readnone
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
index 34d1935..d397f3b 100644
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,35 +1,35 @@
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
;CHECK: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE_C
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
-;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: S_WAITCNT 1904
;CHECK-NEXT: IMAGE_SAMPLE
define void @test() {
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
new file mode 100644
index 0000000..ba9620c
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
@@ -0,0 +1,83 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = call float @llvm.R600.interp.input(i32 0, i32 0)
+ %1 = call float @llvm.R600.interp.input(i32 1, i32 0)
+ %2 = call float @llvm.R600.interp.input(i32 2, i32 0)
+ %3 = call float @llvm.R600.interp.input(i32 3, i32 0)
+ %4 = fcmp ult float %1, 0.000000e+00
+ %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
+ %6 = fsub float -0.000000e+00, %5
+ %7 = fptosi float %6 to i32
+ %8 = bitcast i32 %7 to float
+ %9 = fcmp ult float %0, 5.700000e+01
+ %10 = select i1 %9, float 1.000000e+00, float 0.000000e+00
+ %11 = fsub float -0.000000e+00, %10
+ %12 = fptosi float %11 to i32
+ %13 = bitcast i32 %12 to float
+ %14 = bitcast float %8 to i32
+ %15 = bitcast float %13 to i32
+ %16 = and i32 %14, %15
+ %17 = bitcast i32 %16 to float
+ %18 = bitcast float %17 to i32
+ %19 = icmp ne i32 %18, 0
+ %20 = fcmp ult float %0, 0.000000e+00
+ %21 = select i1 %20, float 1.000000e+00, float 0.000000e+00
+ %22 = fsub float -0.000000e+00, %21
+ %23 = fptosi float %22 to i32
+ %24 = bitcast i32 %23 to float
+ %25 = bitcast float %24 to i32
+ %26 = icmp ne i32 %25, 0
+ br i1 %19, label %IF, label %ELSE
+
+IF: ; preds = %main_body
+ %. = select i1 %26, float 0.000000e+00, float 1.000000e+00
+ %.18 = select i1 %26, float 1.000000e+00, float 0.000000e+00
+ br label %ENDIF
+
+ELSE: ; preds = %main_body
+ br i1 %26, label %ENDIF, label %ELSE17
+
+ENDIF: ; preds = %ELSE17, %ELSE, %IF
+ %temp1.0 = phi float [ %., %IF ], [ %48, %ELSE17 ], [ 0.000000e+00, %ELSE ]
+ %temp2.0 = phi float [ 0.000000e+00, %IF ], [ %49, %ELSE17 ], [ 1.000000e+00, %ELSE ]
+ %temp.0 = phi float [ %.18, %IF ], [ %47, %ELSE17 ], [ 0.000000e+00, %ELSE ]
+ %27 = call float @llvm.AMDIL.clamp.(float %temp.0, float 0.000000e+00, float 1.000000e+00)
+ %28 = call float @llvm.AMDIL.clamp.(float %temp1.0, float 0.000000e+00, float 1.000000e+00)
+ %29 = call float @llvm.AMDIL.clamp.(float %temp2.0, float 0.000000e+00, float 1.000000e+00)
+ %30 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+ %31 = insertelement <4 x float> undef, float %27, i32 0
+ %32 = insertelement <4 x float> %31, float %28, i32 1
+ %33 = insertelement <4 x float> %32, float %29, i32 2
+ %34 = insertelement <4 x float> %33, float %30, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0)
+ ret void
+
+ELSE17: ; preds = %ELSE
+ %35 = fadd float 0.000000e+00, 0x3FC99999A0000000
+ %36 = fadd float 0.000000e+00, 0x3FC99999A0000000
+ %37 = fadd float 0.000000e+00, 0x3FC99999A0000000
+ %38 = fadd float %35, 0x3FC99999A0000000
+ %39 = fadd float %36, 0x3FC99999A0000000
+ %40 = fadd float %37, 0x3FC99999A0000000
+ %41 = fadd float %38, 0x3FC99999A0000000
+ %42 = fadd float %39, 0x3FC99999A0000000
+ %43 = fadd float %40, 0x3FC99999A0000000
+ %44 = fadd float %41, 0x3FC99999A0000000
+ %45 = fadd float %42, 0x3FC99999A0000000
+ %46 = fadd float %43, 0x3FC99999A0000000
+ %47 = fadd float %44, 0x3FC99999A0000000
+ %48 = fadd float %45, 0x3FC99999A0000000
+ %49 = fadd float %46, 0x3FC99999A0000000
+ br label %ENDIF
+}
+
+declare float @llvm.R600.interp.input(i32, i32) #0
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
new file mode 100644
index 0000000..5e875c4
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -0,0 +1,88 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = load <4 x float> addrspace(9)* null
+ %1 = extractelement <4 x float> %0, i32 3
+ %2 = fptosi float %1 to i32
+ %3 = bitcast i32 %2 to float
+ %4 = bitcast float %3 to i32
+ %5 = sdiv i32 %4, 4
+ %6 = bitcast i32 %5 to float
+ %7 = bitcast float %6 to i32
+ %8 = mul i32 %7, 4
+ %9 = bitcast i32 %8 to float
+ %10 = bitcast float %9 to i32
+ %11 = sub i32 0, %10
+ %12 = bitcast i32 %11 to float
+ %13 = bitcast float %3 to i32
+ %14 = bitcast float %12 to i32
+ %15 = add i32 %13, %14
+ %16 = bitcast i32 %15 to float
+ %17 = load <4 x float> addrspace(9)* null
+ %18 = extractelement <4 x float> %17, i32 0
+ %19 = load <4 x float> addrspace(9)* null
+ %20 = extractelement <4 x float> %19, i32 1
+ %21 = load <4 x float> addrspace(9)* null
+ %22 = extractelement <4 x float> %21, i32 2
+ br label %LOOP
+
+LOOP: ; preds = %IF31, %main_body
+ %temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %47, %IF31 ]
+ %temp6.0 = phi float [ %22, %main_body ], [ %temp6.1, %IF31 ]
+ %temp5.0 = phi float [ %20, %main_body ], [ %temp5.1, %IF31 ]
+ %temp4.0 = phi float [ %18, %main_body ], [ %temp4.1, %IF31 ]
+ %23 = bitcast float %temp12.0 to i32
+ %24 = bitcast float %6 to i32
+ %25 = icmp sge i32 %23, %24
+ %26 = sext i1 %25 to i32
+ %27 = bitcast i32 %26 to float
+ %28 = bitcast float %27 to i32
+ %29 = icmp ne i32 %28, 0
+ br i1 %29, label %IF, label %LOOP29
+
+IF: ; preds = %LOOP
+ %30 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
+ %31 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
+ %32 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
+ %33 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+ %34 = insertelement <4 x float> undef, float %30, i32 0
+ %35 = insertelement <4 x float> %34, float %31, i32 1
+ %36 = insertelement <4 x float> %35, float %32, i32 2
+ %37 = insertelement <4 x float> %36, float %33, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
+ ret void
+
+LOOP29: ; preds = %LOOP, %ENDIF30
+ %temp6.1 = phi float [ %temp4.1, %ENDIF30 ], [ %temp6.0, %LOOP ]
+ %temp5.1 = phi float [ %temp6.1, %ENDIF30 ], [ %temp5.0, %LOOP ]
+ %temp4.1 = phi float [ %temp5.1, %ENDIF30 ], [ %temp4.0, %LOOP ]
+ %temp20.0 = phi float [ %50, %ENDIF30 ], [ 0.000000e+00, %LOOP ]
+ %38 = bitcast float %temp20.0 to i32
+ %39 = bitcast float %16 to i32
+ %40 = icmp sge i32 %38, %39
+ %41 = sext i1 %40 to i32
+ %42 = bitcast i32 %41 to float
+ %43 = bitcast float %42 to i32
+ %44 = icmp ne i32 %43, 0
+ br i1 %44, label %IF31, label %ENDIF30
+
+IF31: ; preds = %LOOP29
+ %45 = bitcast float %temp12.0 to i32
+ %46 = add i32 %45, 1
+ %47 = bitcast i32 %46 to float
+ br label %LOOP
+
+ENDIF30: ; preds = %LOOP29
+ %48 = bitcast float %temp20.0 to i32
+ %49 = add i32 %48, 1
+ %50 = bitcast i32 %49 to float
+ br label %LOOP29
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
new file mode 100644
index 0000000..d142cac
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop.ll
@@ -0,0 +1,55 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = load <4 x float> addrspace(9)* null
+ %1 = extractelement <4 x float> %0, i32 3
+ %2 = fptosi float %1 to i32
+ %3 = bitcast i32 %2 to float
+ %4 = load <4 x float> addrspace(9)* null
+ %5 = extractelement <4 x float> %4, i32 0
+ %6 = load <4 x float> addrspace(9)* null
+ %7 = extractelement <4 x float> %6, i32 1
+ %8 = load <4 x float> addrspace(9)* null
+ %9 = extractelement <4 x float> %8, i32 2
+ br label %LOOP
+
+LOOP: ; preds = %ENDIF, %main_body
+ %temp4.0 = phi float [ %5, %main_body ], [ %temp5.0, %ENDIF ]
+ %temp5.0 = phi float [ %7, %main_body ], [ %temp6.0, %ENDIF ]
+ %temp6.0 = phi float [ %9, %main_body ], [ %temp4.0, %ENDIF ]
+ %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %27, %ENDIF ]
+ %10 = bitcast float %temp8.0 to i32
+ %11 = bitcast float %3 to i32
+ %12 = icmp sge i32 %10, %11
+ %13 = sext i1 %12 to i32
+ %14 = bitcast i32 %13 to float
+ %15 = bitcast float %14 to i32
+ %16 = icmp ne i32 %15, 0
+ br i1 %16, label %IF, label %ENDIF
+
+IF: ; preds = %LOOP
+ %17 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
+ %18 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
+ %19 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
+ %20 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+ %21 = insertelement <4 x float> undef, float %17, i32 0
+ %22 = insertelement <4 x float> %21, float %18, i32 1
+ %23 = insertelement <4 x float> %22, float %19, i32 2
+ %24 = insertelement <4 x float> %23, float %20, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
+ ret void
+
+ENDIF: ; preds = %LOOP
+ %25 = bitcast float %temp8.0 to i32
+ %26 = add i32 %25, 1
+ %27 = bitcast i32 %26 to float
+ br label %LOOP
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
new file mode 100644
index 0000000..6afd677
--- /dev/null
+++ b/test/CodeGen/R600/schedule-if-2.ll
@@ -0,0 +1,94 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %1 = extractelement <4 x float> %0, i32 0
+ %2 = fadd float 1.000000e+03, %1
+ %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %4 = extractelement <4 x float> %3, i32 0
+ %5 = bitcast float %4 to i32
+ %6 = icmp eq i32 %5, 0
+ %7 = sext i1 %6 to i32
+ %8 = bitcast i32 %7 to float
+ %9 = bitcast float %8 to i32
+ %10 = icmp ne i32 %9, 0
+ br i1 %10, label %IF, label %ELSE
+
+IF: ; preds = %main_body
+ %11 = call float @fabs(float %2)
+ %12 = fcmp ueq float %11, 0x7FF0000000000000
+ %13 = select i1 %12, float 1.000000e+00, float 0.000000e+00
+ %14 = fsub float -0.000000e+00, %13
+ %15 = fptosi float %14 to i32
+ %16 = bitcast i32 %15 to float
+ %17 = bitcast float %16 to i32
+ %18 = icmp ne i32 %17, 0
+ %. = select i1 %18, float 0x36A0000000000000, float 0.000000e+00
+ %19 = fcmp une float %2, %2
+ %20 = select i1 %19, float 1.000000e+00, float 0.000000e+00
+ %21 = fsub float -0.000000e+00, %20
+ %22 = fptosi float %21 to i32
+ %23 = bitcast i32 %22 to float
+ %24 = bitcast float %23 to i32
+ %25 = icmp ne i32 %24, 0
+ %temp8.0 = select i1 %25, float 0x36A0000000000000, float 0.000000e+00
+ %26 = bitcast float %. to i32
+ %27 = sitofp i32 %26 to float
+ %28 = bitcast float %temp8.0 to i32
+ %29 = sitofp i32 %28 to float
+ %30 = fcmp ugt float %2, 0.000000e+00
+ %31 = select i1 %30, float 1.000000e+00, float %2
+ %32 = fcmp uge float %31, 0.000000e+00
+ %33 = select i1 %32, float %31, float -1.000000e+00
+ %34 = fadd float %33, 1.000000e+00
+ %35 = fmul float %34, 5.000000e-01
+ br label %ENDIF
+
+ELSE: ; preds = %main_body
+ %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %37 = extractelement <4 x float> %36, i32 0
+ %38 = bitcast float %37 to i32
+ %39 = icmp eq i32 %38, 1
+ %40 = sext i1 %39 to i32
+ %41 = bitcast i32 %40 to float
+ %42 = bitcast float %41 to i32
+ %43 = icmp ne i32 %42, 0
+ br i1 %43, label %IF23, label %ENDIF
+
+ENDIF: ; preds = %IF23, %ELSE, %IF
+ %temp4.0 = phi float [ %2, %IF ], [ %56, %IF23 ], [ 0.000000e+00, %ELSE ]
+ %temp5.0 = phi float [ %27, %IF ], [ %60, %IF23 ], [ 0.000000e+00, %ELSE ]
+ %temp6.0 = phi float [ %29, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
+ %temp7.0 = phi float [ %35, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
+ %44 = insertelement <4 x float> undef, float %temp4.0, i32 0
+ %45 = insertelement <4 x float> %44, float %temp5.0, i32 1
+ %46 = insertelement <4 x float> %45, float %temp6.0, i32 2
+ %47 = insertelement <4 x float> %46, float %temp7.0, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0)
+ ret void
+
+IF23: ; preds = %ELSE
+ %48 = fcmp ult float 0.000000e+00, %2
+ %49 = select i1 %48, float 1.000000e+00, float 0.000000e+00
+ %50 = fsub float -0.000000e+00, %49
+ %51 = fptosi float %50 to i32
+ %52 = bitcast i32 %51 to float
+ %53 = bitcast float %52 to i32
+ %54 = icmp ne i32 %53, 0
+ %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
+ %55 = bitcast float %.28 to i32
+ %56 = sitofp i32 %55 to float
+ %57 = load <4 x float> addrspace(8)* null
+ %58 = extractelement <4 x float> %57, i32 0
+ %59 = fsub float -0.000000e+00, %58
+ %60 = fadd float %2, %59
+ br label %ENDIF
+}
+
+declare float @fabs(float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readonly }
diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
new file mode 100644
index 0000000..347d92f
--- /dev/null
+++ b/test/CodeGen/R600/schedule-if.ll
@@ -0,0 +1,46 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %1 = extractelement <4 x float> %0, i32 0
+ %2 = bitcast float %1 to i32
+ %3 = icmp eq i32 %2, 0
+ %4 = sext i1 %3 to i32
+ %5 = bitcast i32 %4 to float
+ %6 = bitcast float %5 to i32
+ %7 = icmp ne i32 %6, 0
+ br i1 %7, label %ENDIF, label %ELSE
+
+ELSE: ; preds = %main_body
+ %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %9 = extractelement <4 x float> %8, i32 0
+ %10 = bitcast float %9 to i32
+ %11 = icmp eq i32 %10, 1
+ %12 = sext i1 %11 to i32
+ %13 = bitcast i32 %12 to float
+ %14 = bitcast float %13 to i32
+ %15 = icmp ne i32 %14, 0
+ br i1 %15, label %IF13, label %ENDIF
+
+ENDIF: ; preds = %IF13, %ELSE, %main_body
+ %temp.0 = phi float [ 1.000000e+03, %main_body ], [ 1.000000e+00, %IF13 ], [ 0.000000e+00, %ELSE ]
+ %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
+ %temp3.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+ %16 = insertelement <4 x float> undef, float %temp.0, i32 0
+ %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
+ %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2
+ %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
+ ret void
+
+IF13: ; preds = %ELSE
+ %20 = load <4 x float> addrspace(8)* null
+ %21 = extractelement <4 x float> %20, i32 0
+ %22 = fsub float -0.000000e+00, %21
+ %23 = fadd float 1.000000e+03, %22
+ br label %ENDIF
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
new file mode 100644
index 0000000..44b7c2f
--- /dev/null
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -0,0 +1,134 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = call float @llvm.R600.load.input(i32 4)
+ %1 = call float @llvm.R600.load.input(i32 5)
+ %2 = call float @llvm.R600.load.input(i32 6)
+ %3 = call float @llvm.R600.load.input(i32 7)
+ %4 = fcmp ult float %0, 0.000000e+00
+ %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
+ %6 = fsub float -0.000000e+00, %5
+ %7 = fptosi float %6 to i32
+ %8 = bitcast i32 %7 to float
+ %9 = bitcast float %8 to i32
+ %10 = icmp ne i32 %9, 0
+ br i1 %10, label %LOOP, label %ENDIF
+
+ENDIF: ; preds = %ENDIF16, %LOOP, %main_body
+ %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ]
+ %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
+ %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
+ %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
+ %11 = load <4 x float> addrspace(9)* null
+ %12 = extractelement <4 x float> %11, i32 0
+ %13 = fmul float %12, %0
+ %14 = load <4 x float> addrspace(9)* null
+ %15 = extractelement <4 x float> %14, i32 1
+ %16 = fmul float %15, %0
+ %17 = load <4 x float> addrspace(9)* null
+ %18 = extractelement <4 x float> %17, i32 2
+ %19 = fmul float %18, %0
+ %20 = load <4 x float> addrspace(9)* null
+ %21 = extractelement <4 x float> %20, i32 3
+ %22 = fmul float %21, %0
+ %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %24 = extractelement <4 x float> %23, i32 0
+ %25 = fmul float %24, %1
+ %26 = fadd float %25, %13
+ %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %28 = extractelement <4 x float> %27, i32 1
+ %29 = fmul float %28, %1
+ %30 = fadd float %29, %16
+ %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %32 = extractelement <4 x float> %31, i32 2
+ %33 = fmul float %32, %1
+ %34 = fadd float %33, %19
+ %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %36 = extractelement <4 x float> %35, i32 3
+ %37 = fmul float %36, %1
+ %38 = fadd float %37, %22
+ %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %40 = extractelement <4 x float> %39, i32 0
+ %41 = fmul float %40, %2
+ %42 = fadd float %41, %26
+ %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %44 = extractelement <4 x float> %43, i32 1
+ %45 = fmul float %44, %2
+ %46 = fadd float %45, %30
+ %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %48 = extractelement <4 x float> %47, i32 2
+ %49 = fmul float %48, %2
+ %50 = fadd float %49, %34
+ %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %52 = extractelement <4 x float> %51, i32 3
+ %53 = fmul float %52, %2
+ %54 = fadd float %53, %38
+ %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %56 = extractelement <4 x float> %55, i32 0
+ %57 = fmul float %56, %3
+ %58 = fadd float %57, %42
+ %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %60 = extractelement <4 x float> %59, i32 1
+ %61 = fmul float %60, %3
+ %62 = fadd float %61, %46
+ %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %64 = extractelement <4 x float> %63, i32 2
+ %65 = fmul float %64, %3
+ %66 = fadd float %65, %50
+ %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %68 = extractelement <4 x float> %67, i32 3
+ %69 = fmul float %68, %3
+ %70 = fadd float %69, %54
+ %71 = insertelement <4 x float> undef, float %58, i32 0
+ %72 = insertelement <4 x float> %71, float %62, i32 1
+ %73 = insertelement <4 x float> %72, float %66, i32 2
+ %74 = insertelement <4 x float> %73, float %70, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1)
+ %75 = insertelement <4 x float> undef, float %temp.0, i32 0
+ %76 = insertelement <4 x float> %75, float %temp1.0, i32 1
+ %77 = insertelement <4 x float> %76, float %temp2.0, i32 2
+ %78 = insertelement <4 x float> %77, float %temp3.0, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2)
+ ret void
+
+LOOP: ; preds = %main_body, %ENDIF19
+ %temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+ %temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ]
+ %temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+ %temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+ %temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ]
+ %79 = fcmp uge float %temp4.0, %0
+ %80 = select i1 %79, float 1.000000e+00, float 0.000000e+00
+ %81 = fsub float -0.000000e+00, %80
+ %82 = fptosi float %81 to i32
+ %83 = bitcast i32 %82 to float
+ %84 = bitcast float %83 to i32
+ %85 = icmp ne i32 %84, 0
+ br i1 %85, label %ENDIF, label %ENDIF16
+
+ENDIF16: ; preds = %LOOP
+ %86 = fcmp une float %2, %temp4.0
+ %87 = select i1 %86, float 1.000000e+00, float 0.000000e+00
+ %88 = fsub float -0.000000e+00, %87
+ %89 = fptosi float %88 to i32
+ %90 = bitcast i32 %89 to float
+ %91 = bitcast float %90 to i32
+ %92 = icmp ne i32 %91, 0
+ br i1 %92, label %ENDIF, label %ENDIF19
+
+ENDIF19: ; preds = %ENDIF16
+ %93 = fadd float %temp.1, 1.000000e+00
+ %94 = fadd float %temp1.1, 0.000000e+00
+ %95 = fadd float %temp2.1, 0.000000e+00
+ %96 = fadd float %temp3.1, 0.000000e+00
+ %97 = fadd float %temp4.0, 1.000000e+00
+ br label %LOOP
+}
+
+declare float @llvm.R600.load.input(i32) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
new file mode 100644
index 0000000..02d9353
--- /dev/null
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @test_a
+; CHECK-NOT: CND
+; CHECK: SET{{[NEQGTL]+}}_DX10
+
+define void @test_a(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 0.000000e+00
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ %4 = bitcast i32 %3 to float
+ %5 = bitcast float %4 to i32
+ %6 = icmp ne i32 %5, 0
+ br i1 %6, label %IF, label %ENDIF
+
+IF:
+ %7 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 0, i32 addrspace(1)* %7
+ br label %ENDIF
+
+ENDIF:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
+
+; Same as test_a, but the branch labels are swapped to produce the inverse cc
+; for the icmp instruction
+
+; CHECK: @test_b
+; CHECK: SET{{[GTEQN]+}}_DX10
+; CHECK-NEXT: PRED_
+define void @test_b(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 0.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ %4 = bitcast i32 %3 to float
+ %5 = bitcast float %4 to i32
+ %6 = icmp ne i32 %5, 0
+ br i1 %6, label %ENDIF, label %IF
+
+IF:
+ %7 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 0, i32 addrspace(1)* %7
+ br label %ENDIF
+
+ENDIF:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
+
+; Test a CND*_INT instruction with float true/false values
+; CHECK: @test_c
+; CHECK: CND{{[GTE]+}}_INT
+define void @test_c(float addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp sgt i32 %in, 0
+ %1 = select i1 %0, float 2.0, float 3.0
+ store float %1, float addrspace(1)* %out
+ ret void
+}