aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PTX/PTXISelLowering.cpp
diff options
context:
space:
mode:
authorNowar Gu <nowar100@gmail.com>2011-06-17 14:29:24 +0800
committerNowar Gu <nowar100@gmail.com>2011-06-20 15:49:07 +0800
commit907af0f20f58f2ea26da7ea64e1f094cd6880db7 (patch)
tree02007757de416c561df174d582205cebfa582801 /lib/Target/PTX/PTXISelLowering.cpp
parent1d4f9a57447faa0142a1d0301e5ce550cfe60c4f (diff)
parentec324e5ae44025c6bdb930b78198f30f807e355b (diff)
downloadexternal_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.zip
external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.gz
external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.bz2
Merge upstream to r133240 at Fri. 17th Jun 2011.
Conflicts: lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/Target/ARM/ARMCodeEmitter.cpp
Diffstat (limited to 'lib/Target/PTX/PTXISelLowering.cpp')
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp245
1 files changed, 163 insertions, 82 deletions
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index 7187518..c3cdaba 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -16,6 +16,7 @@
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -24,21 +25,43 @@
using namespace llvm;
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "PTXGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
- addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
- addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
- addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
- addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
- addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
- addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
+ addRegisterClass(MVT::i1, PTX::RegPredRegisterClass);
+ addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
+ addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
+ addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
+ addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
+ addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ // Turn i16 (z)extload into load + (z)extend
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+ // Turn f32 extload into load + fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // Turn f64 truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// Customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -46,14 +69,30 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
// Expand BR_CC into BRCOND
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ // Expand SELECT_CC into SETCC
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ // need to lower SETCC of RegPred into bitwise logic
+ setOperationAction(ISD::SETCC, MVT::i1, Custom);
+
+ setMinFunctionAlignment(2);
+
// Compute derived properties from the register classes
computeRegisterProperties();
}
+MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+}
+
SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unimplemented operand");
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
}
@@ -78,6 +117,28 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
// Custom Lower Operation
//===----------------------------------------------------------------------===//
+SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Look for X == 0, X == 1, X != 0, or X != 1
+ // We can simplify these to bitwise logic
+
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+ }
+
+ return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
+}
+
SDValue PTXTargetLowering::
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
@@ -111,12 +172,12 @@ struct argmap_entry {
void reset() { loc = RC->begin(); }
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
} argmap[] = {
- argmap_entry(MVT::i1, PTX::PredsRegisterClass),
- argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
- argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
- argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
- argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
- argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
+ argmap_entry(MVT::i1, PTX::RegPredRegisterClass),
+ argmap_entry(MVT::i16, PTX::RegI16RegisterClass),
+ argmap_entry(MVT::i32, PTX::RegI32RegisterClass),
+ argmap_entry(MVT::i64, PTX::RegI64RegisterClass),
+ argmap_entry(MVT::f32, PTX::RegF32RegisterClass),
+ argmap_entry(MVT::f64, PTX::RegF64RegisterClass)
};
} // end anonymous namespace
@@ -145,44 +206,72 @@ SDValue PTXTargetLowering::
break;
}
- // Make sure we don't add argument registers twice
- if (MFI->isDoneAddArg())
- llvm_unreachable("cannot add argument registers twice");
-
- // Reset argmap before allocation
- for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap);
- i != e; ++ i)
- i->reset();
-
- for (int i = 0, e = Ins.size(); i != e; ++ i) {
- MVT::SimpleValueType VT = Ins[i].VT.SimpleTy;
+ if (MFI->isKernel()) {
+ // For kernel functions, we just need to emit the proper READ_PARAM ISDs
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- struct argmap_entry *entry = std::find(argmap,
- argmap + array_lengthof(argmap), VT);
- if (entry == argmap + array_lengthof(argmap))
- llvm_unreachable("Type of argument is not supported");
+ assert(Ins[i].VT != MVT::i1 && "Kernels cannot take pred operands");
- if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass)
- llvm_unreachable("cannot pass preds to kernel");
+ SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, Ins[i].VT, Chain,
+ DAG.getTargetConstant(i, MVT::i32));
+ InVals.push_back(ArgValue);
- MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
-
- unsigned preg = *++(entry->loc); // allocate start from register 1
- unsigned vreg = RegInfo.createVirtualRegister(entry->RC);
- RegInfo.addLiveIn(preg, vreg);
-
- MFI->addArgReg(preg);
-
- SDValue inval;
- if (MFI->isKernel())
- inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain,
- DAG.getTargetConstant(i, MVT::i32));
- else
- inval = DAG.getCopyFromReg(Chain, dl, vreg, VT);
- InVals.push_back(inval);
+ // Instead of storing a physical register in our argument list, we just
+ // store the total size of the parameter, in bits. The ASM printer
+ // knows how to process this.
+ MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
+ }
+ }
+ else {
+ // For device functions, we use the PTX calling convention to do register
+ // assignments then create CopyFromReg ISDs for the allocated registers
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+ CCValAssign& VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ TargetRegisterClass* TRC = 0;
+
+ assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ TRC = PTX::RegPredRegisterClass;
+ }
+ else if (RegVT == MVT::i16) {
+ TRC = PTX::RegI16RegisterClass;
+ }
+ else if (RegVT == MVT::i32) {
+ TRC = PTX::RegI32RegisterClass;
+ }
+ else if (RegVT == MVT::i64) {
+ TRC = PTX::RegI64RegisterClass;
+ }
+ else if (RegVT == MVT::f32) {
+ TRC = PTX::RegF32RegisterClass;
+ }
+ else if (RegVT == MVT::f64) {
+ TRC = PTX::RegF64RegisterClass;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ InVals.push_back(ArgValue);
+
+ MFI->addArgReg(VA.getLocReg());
+ }
}
-
- MFI->doneAddArg();
return Chain;
}
@@ -204,51 +293,43 @@ SDValue PTXTargetLowering::
assert(Outs.size() == 0 && "Kernel must return void.");
return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
case CallingConv::PTX_Device:
- assert(Outs.size() <= 1 && "Can at most return one value.");
+ //assert(Outs.size() <= 1 && "Can at most return one value.");
break;
}
- // PTX_Device
-
- // return void
- if (Outs.size() == 0)
- return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
SDValue Flag;
- unsigned reg;
- if (Outs[0].VT == MVT::i16) {
- reg = PTX::RH0;
- }
- else if (Outs[0].VT == MVT::i32) {
- reg = PTX::R0;
- }
- else if (Outs[0].VT == MVT::i64) {
- reg = PTX::RD0;
- }
- else if (Outs[0].VT == MVT::f32) {
- reg = PTX::F0;
- }
- else {
- assert(Outs[0].VT == MVT::f64 && "Can return only basic types");
- reg = PTX::FD0;
- }
+ CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
- MachineFunction &MF = DAG.getMachineFunction();
- PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
- MFI->setRetReg(reg);
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+
+ CCValAssign& VA = RVLocs[i];
+
+ assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+ unsigned Reg = VA.getLocReg();
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function
- if (DAG.getMachineFunction().getRegInfo().liveout_empty())
- DAG.getMachineFunction().getRegInfo().addLiveOut(reg);
+ DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
- // Copy the result values into the output registers
- Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
- // Guarantee that all emitted copies are stuck together,
- // avoiding something bad
- Flag = Chain.getValue(1);
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad
+ Flag = Chain.getValue(1);
- return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+ MFI->addRetReg(Reg);
+ }
+
+ if (Flag.getNode() == 0) {
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+ }
+ else {
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+ }
}