aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp10
-rw-r--r--test/CodeGen/R600/tex-clause-antidep.ll24
2 files changed, 28 insertions, 6 deletions
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 6e21df8..ab29d60 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -110,7 +110,7 @@ private:
}
bool isCompatibleWithClause(const MachineInstr *MI,
- std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const {
+ std::set<unsigned> &DstRegs) const {
unsigned DstMI, SrcMI;
for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
E = MI->operands_end(); I != E; ++I) {
@@ -136,9 +136,7 @@ private:
&AMDGPU::R600_Reg128RegClass);
}
}
- if ((DstRegs.find(SrcMI) == DstRegs.end()) &&
- (SrcRegs.find(DstMI) == SrcRegs.end())) {
- SrcRegs.insert(SrcMI);
+ if ((DstRegs.find(SrcMI) == DstRegs.end())) {
DstRegs.insert(DstMI);
return true;
} else
@@ -152,7 +150,7 @@ private:
std::vector<MachineInstr *> ClauseContent;
unsigned AluInstCount = 0;
bool IsTex = TII->usesTextureCache(ClauseHead);
- std::set<unsigned> DstRegs, SrcRegs;
+ std::set<unsigned> DstRegs;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
if (IsTrivialInst(I))
continue;
@@ -161,7 +159,7 @@ private:
if ((IsTex && !TII->usesTextureCache(I)) ||
(!IsTex && !TII->usesVertexCache(I)))
break;
- if (!isCompatibleWithClause(I, DstRegs, SrcRegs))
+ if (!isCompatibleWithClause(I, DstRegs))
break;
AluInstCount ++;
ClauseContent.push_back(I);
diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll
new file mode 100644
index 0000000..5979609
--- /dev/null
+++ b/test/CodeGen/R600/tex-clause-antidep.ll
@@ -0,0 +1,24 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX
+;CHECK-NEXT: ALU
+
+define void @test() {
+ %1 = call float @llvm.R600.load.input(i32 0)
+ %2 = call float @llvm.R600.load.input(i32 1)
+ %3 = call float @llvm.R600.load.input(i32 2)
+ %4 = call float @llvm.R600.load.input(i32 3)
+ %5 = insertelement <4 x float> undef, float %1, i32 0
+ %6 = insertelement <4 x float> %5, float %2, i32 1
+ %7 = insertelement <4 x float> %6, float %3, i32 2
+ %8 = insertelement <4 x float> %7, float %4, i32 3
+ %9 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %10 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %11 = fadd <4 x float> %9, %10
+ call void @llvm.R600.store.swizzle(<4 x float> %11, i32 0, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)