diff options
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.td | 71 | ||||
-rw-r--r-- | lib/Target/PTX/PTXMFInfoExtract.cpp | 26 | ||||
-rw-r--r-- | lib/Target/PTX/PTXTargetMachine.cpp | 6 | ||||
-rw-r--r-- | lib/Target/PTX/PTXTargetMachine.h | 2 | ||||
-rw-r--r-- | test/CodeGen/PTX/st.ll | 78 |
5 files changed, 179 insertions, 4 deletions
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 65386c8..13b1d77 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -66,6 +66,56 @@ def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return false; }]>; +def store_global + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::GLOBAL; + return false; +}]>; + +def store_constant + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::CONSTANT; + return false; +}]>; + +def store_local + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::LOCAL; + return false; +}]>; + +def store_parameter + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::PARAMETER; + return false; +}]>; + +def store_shared + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::SHARED; + return false; +}]>; + // Addressing modes. def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>; @@ -145,6 +195,21 @@ multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> { [(set RC:$d, (pat_load ADDRii:$a))]>; } +multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> { + def rr : InstPTX<(outs), + (ins RC:$d, MEMrr:$a), + !strconcat(opstr, ".%type\t[$a], $d"), + [(pat_store RC:$d, ADDRrr:$a)]>; + def ri : InstPTX<(outs), + (ins RC:$d, MEMri:$a), + !strconcat(opstr, ".%type\t[$a], $d"), + [(pat_store RC:$d, ADDRri:$a)]>; + def ii : InstPTX<(outs), + (ins RC:$d, MEMii:$a), + !strconcat(opstr, ".%type\t[$a], $d"), + [(pat_store RC:$d, ADDRii:$a)]>; +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -185,6 +250,12 @@ defm LDl : PTX_LD<"ld.local", RRegs32, load_local>; defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>; defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>; +defm STg : PTX_ST<"st.global", RRegs32, store_global>; +defm STc : PTX_ST<"st.const", RRegs32, store_constant>; +defm STl : PTX_ST<"st.local", RRegs32, store_local>; +defm STp : PTX_ST<"st.param", RRegs32, store_parameter>; +defm STs : PTX_ST<"st.shared", RRegs32, store_shared>; + ///===- Control Flow Instructions -----------------------------------------===// let isReturn = 1, isTerminator = 1, isBarrier = 1 in { diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index bfeb5be..68b641b 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +// NOTE: PTXMFInfoExtract must after register allocation! + namespace llvm { /// PTXMFInfoExtract - PTX specific code to extract of PTX machine /// function information for PTXAsmPrinter @@ -50,22 +52,38 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - DEBUG(dbgs() << "****** PTX FUNCTION LOCAL VAR REG DEF ******\n"); + DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); + + unsigned retreg = MFI->retReg(); - unsigned reg_ret = MFI->retReg(); + DEBUG(dbgs() + << "PTX::NoRegister == " << PTX::NoRegister << "\n" + << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); + + DEBUG(for (unsigned reg = PTX::NoRegister + 1; + reg < PTX::NUM_TARGET_REGS; ++reg) + if (MRI.isPhysRegUsed(reg)) + dbgs() << "Used Reg: " << reg << "\n";); // FIXME: This is a slow linear scanning for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) - if (MRI.isPhysRegUsed(reg) && reg != reg_ret && !MFI->isArgReg(reg)) + if (MRI.isPhysRegUsed(reg) && reg != retreg && !MFI->isArgReg(reg)) MFI->addLocalVarReg(reg); // Notify MachineFunctionInfo that I've done adding local var reg MFI->doneAddLocalVar(); + DEBUG(dbgs() << "Return Reg: " << retreg << "\n"); + + DEBUG(for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(); + i != e; ++i) + dbgs() << "Arg Reg: " << *i << "\n";); + DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++i) - dbgs() << "Used Reg: " << *i << "\n";); + dbgs() << "Local Var Reg: " << *i << "\n";); return false; } diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index a041d07e..f3ba499 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -49,6 +49,12 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createPTXISelDag(*this, OptLevel)); + return false; +} + +bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // PTXMFInfoExtract must after register allocation! PM.add(createPTXMFInfoExtract(*this, OptLevel)); return false; } diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 327ac9f..7f0d282 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -50,6 +50,8 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPostRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); }; // class PTXTargetMachine } // namespace llvm diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll new file mode 100644 index 0000000..ed482b2 --- /dev/null +++ b/test/CodeGen/PTX/st.ll @@ -0,0 +1,78 @@ +; RUN: llc < %s -march=ptx | FileCheck %s + +;CHECK: .extern .global .s32 array[]; +@array = external global [10 x i32] + +;CHECK: .extern .const .s32 array_constant[]; +@array_constant = external addrspace(1) constant [10 x i32] + +;CHECK: .extern .local .s32 array_local[]; +@array_local = external addrspace(2) global [10 x i32] + +;CHECK: .extern .shared .s32 array_shared[]; +@array_shared = external addrspace(4) global [10 x i32] + +define ptx_device void @t1(i32* %p, i32 %x) { +entry: +;CHECK: st.global.s32 [r1], r2; + store i32 %x, i32* %p + ret void +} + +define ptx_device void @t2(i32* %p, i32 %x) { +entry: +;CHECK: st.global.s32 [r1+4], r2; + %i = getelementptr i32* %p, i32 1 + store i32 %x, i32* %i + ret void +} + +define ptx_device void @t3(i32* %p, i32 %q, i32 %x) { +;CHECK: .reg .s32 r0; +entry: +;CHECK: shl.b32 r0, r2, 2; +;CHECK: st.global.s32 [r1+r0], r3; + %i = getelementptr i32* %p, i32 %q + store i32 %x, i32* %i + ret void +} + +define ptx_device void @t4_global(i32 %x) { +entry: +;CHECK: st.global.s32 [array], r1; + %i = getelementptr [10 x i32]* @array, i32 0, i32 0 + store i32 %x, i32* %i + ret void +} + +define ptx_device void @t4_const(i32 %x) { +entry: +;CHECK: st.const.s32 [array_constant], r1; + %i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0 + store i32 %x, i32 addrspace(1)* %i + ret void +} + +define ptx_device void @t4_local(i32 %x) { +entry: +;CHECK: st.local.s32 [array_local], r1; + %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0 + store i32 %x, i32 addrspace(2)* %i + ret void +} + +define ptx_device void @t4_shared(i32 %x) { +entry: +;CHECK: st.shared.s32 [array_shared], r1; + %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0 + store i32 %x, i32 addrspace(4)* %i + ret void +} + +define ptx_device void @t5(i32 %x) { +entry: +;CHECK: st.global.s32 [array+4], r1; + %i = getelementptr [10 x i32]* @array, i32 0, i32 1 + store i32 %x, i32* %i + ret void +} |