aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKalle Raiskila <kalle.raiskila@nokia.com>2011-01-11 09:07:54 +0000
committerKalle Raiskila <kalle.raiskila@nokia.com>2011-01-11 09:07:54 +0000
commit76020ed6f33e3b3943b25c8b5e111afece086f5c (patch)
treea82e35de61771d5249f1ae9eef88b8981b69e319
parent87376839a651ed1221dd46762701f13ad6ece0eb (diff)
downloadexternal_llvm-76020ed6f33e3b3943b25c8b5e111afece086f5c.zip
external_llvm-76020ed6f33e3b3943b25c8b5e111afece086f5c.tar.gz
external_llvm-76020ed6f33e3b3943b25c8b5e111afece086f5c.tar.bz2
Add a "nop filler" pass to SPU.
Filling no-ops is done just before emitting of assembly, when the instruction stream is final. No-ops are inserted to align the instructions so the dual-issue of the pipeline is utilized. This speeds up generated code with a minimum of 1% on a select set of algorithms. This pass may be redundant if the instruction scheduler and all subsequent passes that modify the instruction stream (prolog+epilog inserter, register scavenger, are there others?) are made aware of the instruction alignments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123226 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt1
-rw-r--r--lib/Target/CellSPU/SPU.h1
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td2
-rw-r--r--lib/Target/CellSPU/SPUNopFiller.cpp153
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp9
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h1
6 files changed, 166 insertions, 1 deletions
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index f218311..633bdf6 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -23,4 +23,5 @@ add_llvm_target(CellSPUCodeGen
SPUSubtarget.cpp
SPUTargetMachine.cpp
SPUSelectionDAGInfo.cpp
+ SPUNopFiller.cpp
)
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 1f21511..72f8430 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -23,6 +23,7 @@ namespace llvm {
class formatted_raw_ostream;
FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+ FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
extern Target TheCellSPUTarget;
}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 4095951..4f59e06 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -4216,7 +4216,7 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
// in the odd pipeline)
//===----------------------------------------------------------------------===//
-def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> {
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
let Pattern = [];
let Inst{0-10} = 0b10000000010;
diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 0000000..54a5925
--- /dev/null
+++ b/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to
+// satisfy the dual issue requirements. The actual dual issue scheduling is
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ struct SPUNopFiller : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const InstrItineraryData *IID;
+ bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd
+
+ static char ID;
+ SPUNopFiller(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()),
+ IID(tm.getInstrItineraryData())
+ {
+ DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+ }
+
+ virtual const char *getPassName() const {
+ return "SPU nop/lnop Filler";
+ }
+
+ void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ isEvenPlace = true; //all functions get an .align 3 directive at start
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ runOnMachineBasicBlock(*FI);
+ return true; //never-ever do any more modifications, just print it!
+ }
+
+ typedef enum { none = 0, // no more instructions in this function / BB
+ pseudo = 1, // this does not get executed
+ even = 2,
+ odd = 3 } SPUOpPlace;
+ SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+ };
+ char SPUNopFiller::ID = 0;
+
+}
+
+// Fill a BasicBlock to alignment.
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ assert( isEvenPlace && "basic block start from odd address");
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ {
+ SPUOpPlace this_optype, next_optype;
+ MachineBasicBlock::iterator J = I;
+ J++;
+
+ this_optype = getOpPlacement( *I );
+ next_optype = none;
+ while (J!=MBB.end()){
+ next_optype = getOpPlacement( *J );
+ ++J;
+ if (next_optype != pseudo )
+ break;
+ }
+
+ // padd: odd(wrong), even(wrong), ...
+ // to: nop(corr), odd(corr), even(corr)...
+ if( isEvenPlace && this_optype == odd && next_optype == even ) {
+ DEBUG( dbgs() <<"Adding NOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+ isEvenPlace=false;
+ }
+
+ // padd: even(wrong), odd(wrong), ...
+ // to: lnop(corr), even(corr), odd(corr)...
+ else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+ DEBUG( dbgs() <<"Adding LNOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+ isEvenPlace=true;
+ }
+
+ // now go to next mem slot
+ if( this_optype != pseudo )
+ isEvenPlace = !isEvenPlace;
+
+ }
+
+ // padd basicblock end
+ if( !isEvenPlace ){
+ MachineBasicBlock::iterator J = MBB.end();
+ J--;
+ if (getOpPlacement( *J ) == odd) {
+ DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+ BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+ }
+ else {
+ J++;
+ DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+ BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::LNOP));
+ }
+ isEvenPlace=true;
+ }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+ return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+ int sc = instr.getDesc().getSchedClass();
+ const InstrStage *stage = IID->beginStage(sc);
+ unsigned FUs = stage->getUnits();
+ SPUOpPlace retval;
+
+ switch( FUs ) {
+ case 0: retval = pseudo; break;
+ case 1: retval = odd; break;
+ case 2: retval = even; break;
+ default: retval= pseudo;
+ assert( false && "got unknown FuncUnit\n");
+ break;
+ };
+ return retval;
+}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 3423c69..3ed7361 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -59,3 +59,12 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
PM.add(createSPUISelDag(*this));
return false;
}
+
+// passes to run just before printing the assembly
+bool SPUTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ //align instructions with nops/lnops for dual issue
+ PM.add(createSPUNopFillerPass(*this));
+ return true;
+}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 6e46757..75abd5e 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -82,6 +82,7 @@ public:
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);
};
} // end namespace llvm