diff options
author | Vincent Lejeune <vljn@ovi.com> | 2013-06-05 20:27:35 +0000 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2013-06-05 20:27:35 +0000 |
commit | 512119770e9c32eb0b9e6196ce51917fb2e30d9f (patch) | |
tree | a573723a7bf391be08c8752e3983de46215c7e3b | |
parent | 6ed30e0f0c3876df8b77c44fd3196b40903fb47d (diff) | |
download | external_llvm-512119770e9c32eb0b9e6196ce51917fb2e30d9f.zip external_llvm-512119770e9c32eb0b9e6196ce51917fb2e30d9f.tar.gz external_llvm-512119770e9c32eb0b9e6196ce51917fb2e30d9f.tar.bz2 |
R600: Schedule copy from phys register at beginning of block
It allows regalloc pass to remove them by trivially assigning associated reg
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183336 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/R600MachineScheduler.cpp | 32 | ||||
-rw-r--r-- | lib/Target/R600/R600MachineScheduler.h | 1 | ||||
-rw-r--r-- | test/CodeGen/R600/fabs.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/fadd.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/floor.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/fmad.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/fmax.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/fmin.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/fmul.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/fsub.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/llvm.AMDGPU.mul.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/llvm.pow.ll | 2 |
12 files changed, 42 insertions, 11 deletions
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 9469e0f..8524fe3 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -71,6 +71,10 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { (!AllowSwitchFromAlu && CurInstKind == IDAlu))) { // try to pick ALU SU = pickAlu(); + if (!SU && !PhysicalRegCopy.empty()) { + SU = PhysicalRegCopy.front(); + PhysicalRegCopy.erase(PhysicalRegCopy.begin()); + } if (SU) { if (CurEmitted >= InstKindLimit[IDAlu]) CurEmitted = 0; @@ -118,7 +122,22 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { return SU; } +bool IsUnScheduled(const SUnit *SU) { + return SU->isScheduled; +} + +static +void Filter(std::vector<SUnit *> &List) { + List.erase(std::remove_if(List.begin(), List.end(), IsUnScheduled), List.end()); +} + void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + for (unsigned i = 0; i < AluLast; i++) { + Filter(Available[i]); + Filter(Pending[i]); + } + } if (NextInstKind != CurInstKind) { DEBUG(dbgs() << "Instruction Type Switch\n"); @@ -157,13 +176,24 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { } } +static bool +isPhysicalRegCopy(MachineInstr *MI) { + if (MI->getOpcode() != AMDGPU::COPY) + return false; + + return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()); +} + void R600SchedStrategy::releaseTopNode(SUnit *SU) { DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG);); - } void R600SchedStrategy::releaseBottomNode(SUnit *SU) { DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG);); + if (isPhysicalRegCopy(SU->getInstr())) { + PhysicalRegCopy.push_back(SU); + return; + } int IK = getInstKind(SU); diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index 4dedf70..c5024d2 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -54,6 +54,7 @@ class R600SchedStrategy : public MachineSchedStrategy { std::vector<SUnit *> AvailableAlus[AluLast]; std::vector<SUnit *> UnscheduledARDefs; std::vector<SUnit *> UnscheduledARUses; + std::vector<SUnit *> PhysicalRegCopy; InstKind CurInstKind; int CurEmitted; diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll index b876c0e..85f2882 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/R600/fabs.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MOV * T{{[0-9]+\.[XYZW], \|PV\.[XYZW]\|}} +;CHECK: MOV * T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll index bec5043..9a67232 100644 --- a/test/CodeGen/R600/fadd.ll +++ b/test/CodeGen/R600/fadd.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; CHECK: @fadd_f32 -; CHECK: ADD * T{{[0-9]+\.[XYZW], PV\.[XYZW], PV\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @fadd_f32() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll index 7b8aa0a..877d69a 100644 --- a/test/CodeGen/R600/floor.ll +++ b/test/CodeGen/R600/floor.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: FLOOR * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll index eb82b44..75e65d8 100644 --- a/test/CodeGen/R600/fmad.ll +++ b/test/CodeGen/R600/fmad.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], PV\.[XYZW], PV.[XYZW], PV\.[XYZW]}} +;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll index 9357287..8b704e5 100644 --- a/test/CodeGen/R600/fmax.ll +++ b/test/CodeGen/R600/fmax.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MAX * T{{[0-9]+\.[XYZW], PV\.[XYZW], PV\.[XYZW]}} +;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll index e38269c..5e34b7c 100644 --- a/test/CodeGen/R600/fmin.ll +++ b/test/CodeGen/R600/fmin.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MIN * T{{[0-9]+\.[XYZW], PV\.[XYZW], PV\.[XYZW]}} +;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll index fee5ead..a40e818 100644 --- a/test/CodeGen/R600/fmul.ll +++ b/test/CodeGen/R600/fmul.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; CHECK: @fmul_f32 -; CHECK: MUL_IEEE * {{T[0-9]+\.[XYZW], PV\.[XYZW], PV\.[XYZW]}} +; CHECK: MUL_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @fmul_f32() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll index 2c88cbc..f784cde 100644 --- a/test/CodeGen/R600/fsub.ll +++ b/test/CodeGen/R600/fsub.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; CHECK: @fsub_f32 -; CHECK: ADD * T{{[0-9]+\.[XYZW], PV\.[XYZW], -PV\.[XYZW]}} +; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} define void @fsub_f32() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll index be14e6f..cc0732b 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW], PV\.[XYZW]}} +;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll index 532983f..1422083 100644 --- a/test/CodeGen/R600/llvm.pow.ll +++ b/test/CodeGen/R600/llvm.pow.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { |