diff options
author | Che-Liang Chiou <clchiou@gmail.com> | 2011-02-28 06:34:09 +0000 |
---|---|---|
committer | Che-Liang Chiou <clchiou@gmail.com> | 2011-02-28 06:34:09 +0000 |
commit | f71720231f6de9b2b7fe28edd179ae217a105329 (patch) | |
tree | 7ed3e644aac2d4be87b13f146f0467761e8e7600 /lib/Target/PTX | |
parent | d8d1584c13c554349c235177b2b89cb5117347b2 (diff) | |
download | external_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.zip external_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.tar.gz external_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.tar.bz2 |
Add preliminary support for .f32 in the PTX backend.
- Add appropriate TableGen patterns for fadd, fsub, fmul.
- Add .f32 as the PTX type for the LLVM float type.
- Allow parameters, return values, and global variable declarations
to accept the float type.
- Add appropriate test cases.
Patch by Justin Holewinski
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126636 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/PTX')
-rw-r--r-- | lib/Target/PTX/PTXAsmPrinter.cpp | 29 | ||||
-rw-r--r-- | lib/Target/PTX/PTXISelLowering.cpp | 22 | ||||
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.cpp | 8 | ||||
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.td | 42 | ||||
-rw-r--r-- | lib/Target/PTX/PTXRegisterInfo.td | 40 |
5 files changed, 131 insertions, 10 deletions
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index a605997..25f26fa 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -84,6 +84,7 @@ static const char PARAM_PREFIX[] = "__param_"; static const char *getRegisterTypeName(unsigned RegNo) { #define TEST_REGCLS(cls, clsstr) \ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; + TEST_REGCLS(RRegf32, f32); TEST_REGCLS(RRegs32, s32); TEST_REGCLS(Preds, pred); #undef TEST_REGCLS @@ -115,6 +116,21 @@ static const char *getStateSpaceName(unsigned addressSpace) { return NULL; } +static const char *getTypeName(const Type* type) { + while (true) { + switch (type->getTypeID()) { + default: llvm_unreachable("Unknown type"); + case Type::FloatTyID: return ".f32"; + case Type::IntegerTyID: return ".s32"; // TODO: Handle 64-bit types. + case Type::ArrayTyID: + case Type::PointerTyID: + type = dyn_cast<const SequentialType>(type)->getElementType(); + break; + } + } + return NULL; +} + bool PTXAsmPrinter::doFinalization(Module &M) { // XXX Temproarily remove global variables so that doFinalization() will not // emit them again (global variables are emitted at beginning). @@ -218,6 +234,15 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, case MachineOperand::MO_Register: OS << getRegisterName(MO.getReg()); break; + case MachineOperand::MO_FPImmediate: + APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt(); + if (constFP.getZExtValue() > 0) { + OS << "0F" << constFP.toString(16, false); + } + else { + OS << "0F00000000"; + } + break; } } @@ -265,8 +290,8 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { decl += " "; } - // TODO: add types - decl += ".s32 "; + decl += getTypeName(gv->getType()); + decl += " "; decl += gvsym->getName(); diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index e6d4490..d30c9ec 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -28,9 +28,12 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) // Set up the register classes. addRegisterClass(MVT::i1, PTX::PredsRegisterClass); addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass); - + addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + // Customize translation of memory addresses setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); @@ -87,7 +90,8 @@ struct argmap_entry { bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; } } argmap[] = { argmap_entry(MVT::i1, PTX::PredsRegisterClass), - argmap_entry(MVT::i32, PTX::RRegs32RegisterClass) + argmap_entry(MVT::i32, PTX::RRegs32RegisterClass), + argmap_entry(MVT::f32, PTX::RRegf32RegisterClass) }; } // end anonymous namespace @@ -185,10 +189,18 @@ SDValue PTXTargetLowering:: if (Outs.size() == 0) return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); - assert(Outs[0].VT == MVT::i32 && "Can return only basic types"); - SDValue Flag; - unsigned reg = PTX::R0; + unsigned reg; + + if (Outs[0].VT == MVT::i32) { + reg = PTX::R0; + } + else if (Outs[0].VT == MVT::f32) { + reg = PTX::F0; + } + else { + assert(false && "Can return only basic types"); + } MachineFunction &MF = DAG.getMachineFunction(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 805759b..f2e5e4c 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -28,6 +28,7 @@ static const struct map_entry { const int opcode; } map[] = { { &PTX::RRegs32RegClass, PTX::MOVrr }, + { &PTX::RRegf32RegClass, PTX::MOVrr }, { &PTX::PredsRegClass, PTX::MOVpp } }; @@ -35,12 +36,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, unsigned SrcReg, bool KillSrc) const { - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) - if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) { + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { + if (map[i].cls->contains(DstReg, SrcReg)) { BuildMI(MBB, I, DL, - get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc)); + get(map[i].opcode), DstReg).addReg(SrcReg, getKillRegState(KillSrc)); return; } + } llvm_unreachable("Impossible reg-to-reg copy"); } diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 9a74778..9d962b0 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -143,6 +143,18 @@ def PTXret // Instruction Class Templates //===----------------------------------------------------------------------===// +// Three-operand f32 instruction template +multiclass FLOAT3<string opcstr, SDNode opnode> { + def rr : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b), + !strconcat(opcstr, ".%type\t$d, $a, $b"), + [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>; + def ri : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, f32imm:$b), + !strconcat(opcstr, ".%type\t$d, $a, $b"), + [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>; +} + multiclass INT3<string opcstr, SDNode opnode> { def rr : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a, RRegs32:$b), @@ -204,6 +216,12 @@ multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> { // Instructions //===----------------------------------------------------------------------===// +///===- Floating-Point Arithmetic Instructions ----------------------------===// + +defm FADD : FLOAT3<"add", fadd>; +defm FSUB : FLOAT3<"sub", fsub>; +defm FMUL : FLOAT3<"mul", fmul>; + ///===- Integer Arithmetic Instructions -----------------------------------===// defm ADD : INT3<"add", add>; @@ -223,6 +241,8 @@ let neverHasSideEffects = 1 in { : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>; def MOVrr : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>; + def FMOVrr + : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { @@ -232,8 +252,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOVri : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a", [(set RRegs32:$d, imm:$a)]>; + def FMOVri + : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", + [(set RRegf32:$d, fpimm:$a)]>; } +// Integer loads defm LDg : PTX_LD<"ld.global", RRegs32, load_global>; defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>; defm LDl : PTX_LD<"ld.local", RRegs32, load_local>; @@ -243,12 +267,30 @@ defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>; def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a), "ld.param.%type\t$d, [$a]", []>; +// Floating-point loads +defm FLDg : PTX_LD<"ld.global", RRegf32, load_global>; +defm FLDc : PTX_LD<"ld.const", RRegf32, load_constant>; +defm FLDl : PTX_LD<"ld.local", RRegf32, load_local>; +defm FLDp : PTX_LD<"ld.param", RRegf32, load_parameter>; +defm FLDs : PTX_LD<"ld.shared", RRegf32, load_shared>; + +def FLDpi : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a), + "ld.param.%type\t$d, [$a]", []>; + +// Integer stores defm STg : PTX_ST<"st.global", RRegs32, store_global>; defm STl : PTX_ST<"st.local", RRegs32, store_local>; // Store to parameter state space requires PTX 2.0 or higher? // defm STp : PTX_ST<"st.param", RRegs32, store_parameter>; defm STs : PTX_ST<"st.shared", RRegs32, store_shared>; +// Floating-point stores +defm FSTg : PTX_ST<"st.global", RRegf32, store_global>; +defm FSTl : PTX_ST<"st.local", RRegf32, store_local>; +// Store to parameter state space requires PTX 2.0 or higher? +// defm FSTp : PTX_ST<"st.param", RRegf32, store_parameter>; +defm FSTs : PTX_ST<"st.shared", RRegf32, store_shared>; + ///===- Control Flow Instructions -----------------------------------------===// let isReturn = 1, isTerminator = 1, isBarrier = 1 in { diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index 22e2b34..9158f0d 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -85,6 +85,40 @@ def R29 : PTXReg<"r29">; def R30 : PTXReg<"r30">; def R31 : PTXReg<"r31">; +def F0 : PTXReg<"f0">; +def F1 : PTXReg<"f1">; +def F2 : PTXReg<"f2">; +def F3 : PTXReg<"f3">; +def F4 : PTXReg<"f4">; +def F5 : PTXReg<"f5">; +def F6 : PTXReg<"f6">; +def F7 : PTXReg<"f7">; +def F8 : PTXReg<"f8">; +def F9 : PTXReg<"f9">; +def F10 : PTXReg<"f10">; +def F11 : PTXReg<"f11">; +def F12 : PTXReg<"f12">; +def F13 : PTXReg<"f13">; +def F14 : PTXReg<"f14">; +def F15 : PTXReg<"f15">; +def F16 : PTXReg<"f16">; +def F17 : PTXReg<"f17">; +def F18 : PTXReg<"f18">; +def F19 : PTXReg<"f19">; +def F20 : PTXReg<"f20">; +def F21 : PTXReg<"f21">; +def F22 : PTXReg<"f22">; +def F23 : PTXReg<"f23">; +def F24 : PTXReg<"f24">; +def F25 : PTXReg<"f25">; +def F26 : PTXReg<"f26">; +def F27 : PTXReg<"f27">; +def F28 : PTXReg<"f28">; +def F29 : PTXReg<"f29">; +def F30 : PTXReg<"f30">; +def F31 : PTXReg<"f31">; + + //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// @@ -100,3 +134,9 @@ def RRegs32 : RegisterClass<"PTX", [i32], 32, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31]>; + +def RRegf32 : RegisterClass<"PTX", [f32], 32, + [F0, F1, F2, F3, F4, F5, F6, F7, + F8, F9, F10, F11, F12, F13, F14, F15, + F16, F17, F18, F19, F20, F21, F22, F23, + F24, F25, F26, F27, F28, F29, F30, F31]>; |