diff options
Diffstat (limited to 'lib/Target/PTX')
-rw-r--r-- | lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp | 10 | ||||
-rw-r--r-- | lib/Target/PTX/InstPrinter/PTXInstPrinter.h | 7 | ||||
-rw-r--r-- | lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp | 3 | ||||
-rw-r--r-- | lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h | 2 | ||||
-rw-r--r-- | lib/Target/PTX/PTXISelLowering.cpp | 52 | ||||
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.td | 5 | ||||
-rw-r--r-- | lib/Target/PTX/PTXTargetMachine.cpp | 5 |
7 files changed, 46 insertions, 38 deletions
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp index ec7e2a7..1830213 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -18,26 +18,24 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "PTXGenAsmWriter.inc" PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MRI) { + MCInstPrinter(MAI, MII, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } -StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // Decode the register number into type and offset unsigned RegSpace = RegNo & 0x7; diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h index eef6101..ea4d504 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h @@ -23,15 +23,12 @@ class MCOperand; class PTXInstPrinter : public MCInstPrinter { public: - PTXInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI); + PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - static const char *getInstructionName(unsigned Opcode); - // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index 7671b11..08fb970 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -62,10 +62,11 @@ static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createPTXMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { assert(SyntaxVariant == 0 && "We only have one syntax variant"); - return new PTXInstPrinter(MAI, MRI, STI); + return new PTXInstPrinter(MAI, MII, MRI, STI); } extern "C" void LLVMInitializePTXTargetMC() { diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h index 1003b0b..542638a 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h @@ -15,9 +15,7 @@ #define PTXMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target ThePTX32Target; extern Target ThePTX64Target; diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index db1c953..ef4455b 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -97,7 +97,8 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) // customise setcc to use bitwise logic if possible - setOperationAction(ISD::SETCC, MVT::i1, Custom); + //setOperationAction(ISD::SETCC, MVT::i1, Custom); + setOperationAction(ISD::SETCC, MVT::i1, Legal); // customize translation of memory addresses @@ -156,18 +157,27 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); // Look for X == 0, X == 1, X != 0, or X != 1 // We can simplify these to bitwise logic - if (Op1.getOpcode() == ISD::Constant && - (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || - cast<ConstantSDNode>(Op1)->isNullValue()) && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { + //if (Op1.getOpcode() == ISD::Constant && + // (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + // cast<ConstantSDNode>(Op1)->isNullValue()) && + // (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // + // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + //} - return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); - } + //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1); + //if(COp1 && COp1->getZExtValue() == 1) { + // if(CC == ISD::SETNE) { + // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0); + // } + //} + + llvm_unreachable("setcc was not matched by a pattern!"); return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); } @@ -384,22 +394,22 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>(); PTXParamManager &PM = PTXMFI->getParamManager(); MachineFrameInfo *MFI = MF.getFrameInfo(); - + assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && "Calls are not handled for the target device"); // Identify the callee function const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); const Function *function = cast<Function>(GV); - + // allow non-device calls only for printf - bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; - + bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; + assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) && "PTX function calls must be to PTX device functions"); - + unsigned outSize = isPrintf ? 2 : Outs.size(); - + std::vector<SDValue> Ops; // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] Ops.resize(outSize + Ins.size() + 4); @@ -412,7 +422,7 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // #Outs Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32); - + if (isPrintf) { // first argument is the address of the global string variable in memory unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits()); @@ -421,29 +431,29 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue0, OutVals[0]); Ops[Ins.size()+4] = ParamValue0; - + // alignment is the maximum size of all the arguments unsigned alignment = 0; for (unsigned i = 1; i < OutVals.size(); ++i) { - alignment = std::max(alignment, + alignment = std::max(alignment, OutVals[i].getValueType().getSizeInBits()); } // size is the alignment multiplied by the number of arguments unsigned size = alignment * (OutVals.size() - 1); - + // second argument is the address of the stack object (unless no arguments) unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits()); SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(), MVT::Other); Ops[Ins.size()+5] = ParamValue1; - + if (size > 0) { // create a local stack object to store the arguments unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false); SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy()); - + // store each of the arguments to the stack in turn for (unsigned int i = 1; i != OutVals.size(); i++) { SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy())); @@ -475,7 +485,7 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops[i+Ins.size()+4] = ParamValue; } } - + std::vector<SDValue> InParams; // Generate list of .param variables to hold the return value(s). diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 818d444..bead428 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -808,6 +808,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isBranch = 1, isTerminator = 1 in { // FIXME: The pattern part is blank because I cannot (or do not yet know // how to) use the first operand of PredicateOperand (a RegPred register) here + // When this is revisited, make sure to also look at LowerSETCC and try to + // fold it into negated predicates, if possible. def BRAdp : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [/*(brcond pred:$_p, bb:$d)*/]>; @@ -1017,6 +1019,9 @@ def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; +// setcc - predicate inversion for branch conditions +def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)), + (XORripreds RegPred:$a, imm:$b)>; ///===- Intrinsic Instructions --------------------------------------------===// include "PTXIntrinsicInstrInfo.td" diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 40835d0..c55a658 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -17,7 +17,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -153,10 +152,10 @@ bool PTXPassConfig::addPostRegAlloc() { /// Add passes that optimize machine instructions after register allocation. void PTXPassConfig::addMachineLateOptimization() { if (addPass(BranchFolderPassID) != &NoPassID) - printNoVerify("After BranchFolding"); + printAndVerify("After BranchFolding"); if (addPass(TailDuplicateID) != &NoPassID) - printNoVerify("After TailDuplicate"); + printAndVerify("After TailDuplicate"); } bool PTXPassConfig::addPreEmitPass() { |