diff options
author | Scott Michel <scottm@aero.org> | 2009-01-26 03:31:40 +0000 |
---|---|---|
committer | Scott Michel <scottm@aero.org> | 2009-01-26 03:31:40 +0000 |
commit | c9c8b2a804b2cd3d33a6a965e06a21ff93968f97 (patch) | |
tree | 6141f9f0ec12fefbdd984667613aaf33da6068af /lib/Target | |
parent | 5bf4b7556f025587a8d1a14bd0fb39c12fc9c170 (diff) | |
download | external_llvm-c9c8b2a804b2cd3d33a6a965e06a21ff93968f97.zip external_llvm-c9c8b2a804b2cd3d33a6a965e06a21ff93968f97.tar.gz external_llvm-c9c8b2a804b2cd3d33a6a965e06a21ff93968f97.tar.bz2 |
CellSPU:
- Rename fcmp.ll test to fcmp32.ll, start adding new double tests to fcmp64.ll
- Fix select_bits.ll test
- Capitulate to the DAGCombiner and move i64 constant loads to instruction
selection (SPUISelDAGtoDAG.cpp).
<rant>DAGCombiner will insert all kinds of 64-bit optimizations after
operation legalization occurs and now we have to do most of the work that
instruction selection should be doing twice (once to determine if v2i64
build_vector can be handled by SelectCode(), which then runs all of the
predicates a second time to select the necessary instructions.) But,
CellSPU is a good citizen.</rant>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62990 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/CellSPU/SPU64InstrInfo.td | 4 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 173 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 484 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.td | 30 |
6 files changed, 468 insertions, 231 deletions
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 3329894..06eb149 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -30,8 +30,8 @@ // selb instruction definition for i64. Note that the selection mask is // a vector, produced by various forms of FSM: def SELBr64_cond: - SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), - [/* no pattern */]>; + SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), + [/* no pattern */]>; // The generic i64 select pattern, which assumes that the comparison result // is in a 32-bit register that contains a select mask pattern (i.e., gather diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 0fc7aec..6d7f40d 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -254,26 +254,56 @@ public: /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); - } + } SDNode *emitBuildVector(SDValue build_vec) { + MVT vecVT = build_vec.getValueType(); + SDNode *bvNode = build_vec.getNode(); + bool canBeSelected = false; + + // Check to see if this vector can be represented as a CellSPU immediate + // constant. + if (vecVT == MVT::v8i16) { + if (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0) { + canBeSelected = true; + } + } else if (vecVT == MVT::v4i32) { + if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) + || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) + || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) + || (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0)) { + canBeSelected = true; + } + } else if (vecVT == MVT::v2i64) { + if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) + || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) + || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)) { + canBeSelected = true; + } + } + + if (canBeSelected) { + return Select(build_vec); + } + + // No, need to emit a constant pool spill: std::vector<Constant*> CV; for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(build_vec.getOperand(i)); - CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i)); + CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue())); } Constant *CP = ConstantVector::get(CV); SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); - unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + unsigned Alignment = 1 << cast<ConstantPoolSDNode > (CPIdx)->getAlignment(); SDValue CGPoolOffset = SPU::LowerConstantPool(CPIdx, *CurDAG, SPUtli.getSPUTargetMachine()); return SelectCode(CurDAG->getLoad(build_vec.getValueType(), - CurDAG->getEntryNode(), CGPoolOffset, - PseudoSourceValue::getConstantPool(), 0, - false, Alignment)); + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment)); } /// Select - Convert the specified operand from a target-independent to a @@ -289,6 +319,9 @@ public: //! Emit the instruction sequence for i64 sra SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); + //! Emit the necessary sequence for loading i64 constants: + SDNode *SelectI64Constant(SDValue &Op, MVT OpVT); + //! Returns true if the address N is an A-form (local store) address bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Index); @@ -652,7 +685,9 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (N->isMachineOpcode()) { return NULL; // Already selected. - } else if (Opc == ISD::FrameIndex) { + } + + if (Opc == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType()); @@ -669,6 +704,11 @@ SPUDAGToDAGISel::Select(SDValue Op) { TFI, Imm0), 0); n_ops = 2; } + } else if (Opc == ISD::Constant && OpVT == MVT::i64) { + // Catch the i64 constants that end up here. Note: The backend doesn't + // attempt to legalize the constant (it's useless because DAGCombiner + // will insert 64-bit constants and we can't stop it). + return SelectI64Constant(Op, OpVT); } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { SDValue Op0 = Op.getOperand(0); @@ -745,27 +785,38 @@ SPUDAGToDAGISel::Select(SDValue Op) { return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT, Op.getOperand(0), Op.getOperand(1), SDValue(CGLoad, 0))); - } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); - - return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); - } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG)); - - return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); - } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); - - return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); + } else if (Opc == ISD::TRUNCATE) { + SDValue Op0 = Op.getOperand(0); + if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) + && OpVT == MVT::i32 + && Op0.getValueType() == MVT::i64) { + // Catch the (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 to + // take advantage of the fact that the upper 32 bits are in the + // i32 preferred slot and avoid all kinds of other shuffle gymnastics: + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); + if (CN != 0) { + unsigned shift_amt = unsigned(CN->getZExtValue()); + + if (shift_amt >= 32) { + SDNode *hi32 = + CurDAG->getTargetNode(SPU::ORr32_r64, OpVT, Op0.getOperand(0)); + + shift_amt -= 32; + if (shift_amt > 0) { + // Take care of the additional shift, if present: + SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32); + unsigned Opc = SPU::ROTMAIr32_i32; + + if (Op0.getOpcode() == ISD::SRL) + Opc = SPU::ROTMr32; + + hi32 = CurDAG->getTargetNode(Opc, OpVT, SDValue(hi32, 0), shift); + } + + return hi32; + } + } + } } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) { return SelectSHLi64(Op, OpVT); @@ -1046,6 +1097,70 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0)); } +/*! + Do the necessary magic necessary to load a i64 constant + */ +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) { + ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode()); + MVT OpVecVT = MVT::getVectorVT(OpVT, 2); + SDValue i64vec = + SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue()); + + // Here's where it gets interesting, because we have to parse out the + // subtree handed back in i64vec: + + if (i64vec.getOpcode() == ISD::BIT_CONVERT) { + // The degenerate case where the upper and lower bits in the splat are + // identical: + SDValue Op0 = i64vec.getOperand(0); + ReplaceUses(i64vec, Op0); + + return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, + SDValue(emitBuildVector(Op0), 0)); + } else if (i64vec.getOpcode() == SPUISD::SHUFB) { + SDValue lhs = i64vec.getOperand(0); + SDValue rhs = i64vec.getOperand(1); + SDValue shufmask = i64vec.getOperand(2); + + if (lhs.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(lhs, lhs.getOperand(0)); + lhs = lhs.getOperand(0); + } + + SDNode *lhsNode = (lhs.getNode()->isMachineOpcode() + ? lhs.getNode() + : emitBuildVector(lhs)); + + if (rhs.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(rhs, rhs.getOperand(0)); + rhs = rhs.getOperand(0); + } + + SDNode *rhsNode = (rhs.getNode()->isMachineOpcode() + ? rhs.getNode() + : emitBuildVector(rhs)); + + if (shufmask.getOpcode() == ISD::BIT_CONVERT) { + ReplaceUses(shufmask, shufmask.getOperand(0)); + shufmask = shufmask.getOperand(0); + } + + SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode() + ? shufmask.getNode() + : emitBuildVector(shufmask)); + + SDNode *shufNode = + Select(CurDAG->getNode(SPUISD::SHUFB, OpVecVT, + SDValue(lhsNode, 0), SDValue(rhsNode, 0), + SDValue(shufMaskNode, 0))); + + return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(shufNode, 0)); + } else { + cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n"; + abort(); + } +} + /// createSPUISelDag - This pass converts a legalized DAG into a /// SPU-specific DAG, ready for instruction scheduling. /// diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 124f1a7..6bb76d8 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -17,6 +17,7 @@ #include "SPUFrameInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -79,6 +80,43 @@ namespace { return retval; } + //! Expand a library call into an actual call DAG node + /*! + \note + This code is taken from SelectionDAGLegalize, since it is not exposed as + part of the LLVM SelectionDAG API. + */ + + SDValue + ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, + bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + MVT ArgVT = Op.getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForMVT(); + Entry.Node = Op.getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT(); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + CallingConv::C, false, Callee, Args, DAG); + + return CallInfo.first; + } } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) @@ -113,7 +151,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); // SPU constant load actions are custom lowered: - setOperationAction(ISD::Constant, MVT::i64, Custom); setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); @@ -128,10 +165,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, Custom); - // SMUL_LOHI, UMUL_LOHI are not legal for Cell: - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { MVT StoreVT = (MVT::SimpleValueType) stype; setTruncStoreAction(VT, StoreVT, Expand); @@ -179,16 +212,14 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); - // If we're enabling GP optimizations, use hardware square root + // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt + // for f32!) setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - // Make sure that DAGCombine doesn't insert illegal 64-bit constants - setOperationAction(ISD::FABS, MVT::f64, Custom); - // SPU can do rotate right and left, so legalize it... but customize for i8 // because instructions don't exist. @@ -254,22 +285,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); - // SPU has a legal FP -> signed INT instruction - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + // SPU has a legal FP -> signed INT instruction for f32, but for f64, need + // to expand to a libcall, hence the custom lowering: + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); // FDIV on SPU requires custom lowering - setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall + setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall // SPU has [U|S]INT_TO_FP - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); @@ -338,24 +368,23 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) MVT VT = (MVT::SimpleValueType)i; // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD , VT, Legal); - setOperationAction(ISD::SUB , VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); // mul has to be custom lowered. - // TODO: v2i64 vector multiply - setOperationAction(ISD::MUL , VT, Legal); + setOperationAction(ISD::MUL, VT, Legal); - setOperationAction(ISD::AND , VT, Legal); - setOperationAction(ISD::OR , VT, Legal); - setOperationAction(ISD::XOR , VT, Legal); - setOperationAction(ISD::LOAD , VT, Legal); - setOperationAction(ISD::SELECT, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::SELECT, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); // These operations need to be expanded: - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); // Custom lower build_vector, constant pool spills, insert and // extract vector elements: @@ -866,31 +895,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return SDValue(); } -//! Custom lower i64 integer constants -/*! - This code inserts all of the necessary juggling that needs to occur to load - a 64-bit constant into a register. - */ -static SDValue -LowerConstant(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - - if (VT == MVT::i64) { - ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode()); - SDValue T = DAG.getConstant(CN->getZExtValue(), VT); - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); - } else { - cerr << "LowerConstant: unhandled constant type " - << VT.getMVTString() - << "\n"; - abort(); - /*NOTREACHED*/ - } - - return SDValue(); -} - //! Custom lower double precision floating point constants static SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG) { @@ -1564,7 +1568,7 @@ static bool isConstantSplat(const uint64_t Bits128[2], //! Lower a BUILD_VECTOR instruction creatively: SDValue -SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); // If this is a vector of constants or undefs, get the bits. A bit in // UndefBits is set if the corresponding element of the vector is an @@ -1588,7 +1592,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { abort(); /*NOTREACHED*/ case MVT::v4f32: { - uint32_t Value32 = SplatBits; + uint32_t Value32 = uint32_t(SplatBits); assert(SplatSize == 4 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants @@ -1598,7 +1602,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { break; } case MVT::v2f64: { - uint64_t f64val = SplatBits; + uint64_t f64val = uint64_t(SplatBits); assert(SplatSize == 8 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants @@ -1638,93 +1642,99 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T); } case MVT::v2i64: { - uint64_t val = SplatBits; - uint32_t upper = uint32_t(val >> 32); - uint32_t lower = uint32_t(val); - - if (upper == lower) { - // Magic constant that can be matched by IL, ILA, et. al. - SDValue Val = DAG.getTargetConstant(val, MVT::i64); - return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val); - } else { - SDValue LO32; - SDValue HI32; - SmallVector<SDValue, 16> ShufBytes; - SDValue Result; - bool upper_special, lower_special; - - // NOTE: This code creates common-case shuffle masks that can be easily - // detected as common expressions. It is not attempting to create highly - // specialized masks to replace any and all 0's, 0xff's and 0x80's. - - // Detect if the upper or lower half is a special shuffle mask pattern: - upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000); - lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000); - - // Create lower vector if not a special pattern - if (!lower_special) { - SDValue LO32C = DAG.getConstant(lower, MVT::i32); - LO32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - LO32C, LO32C, LO32C, LO32C)); - } + return SPU::LowerSplat_v2i64(VT, DAG, SplatBits); + } + } - // Create upper vector if not a special pattern - if (!upper_special) { - SDValue HI32C = DAG.getConstant(upper, MVT::i32); - HI32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - HI32C, HI32C, HI32C, HI32C)); - } + return SDValue(); +} - // If either upper or lower are special, then the two input operands are - // the same (basically, one of them is a "don't care") - if (lower_special) - LO32 = HI32; - if (upper_special) - HI32 = LO32; - if (lower_special && upper_special) { - // Unhappy situation... both upper and lower are special, so punt with - // a target constant: - SDValue Zero = DAG.getConstant(0, MVT::i32); - HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, - Zero, Zero); - } +SDValue +SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) { + uint32_t upper = uint32_t(SplatVal >> 32); + uint32_t lower = uint32_t(SplatVal); + + if (upper == lower) { + // Magic constant that can be matched by IL, ILA, et. al. + SDValue Val = DAG.getTargetConstant(upper, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + Val, Val, Val, Val)); + } else { + SDValue LO32; + SDValue HI32; + SmallVector<SDValue, 16> ShufBytes; + SDValue Result; + bool upper_special, lower_special; + + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + + // Create lower vector if not a special pattern + if (!lower_special) { + SDValue LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } - for (int i = 0; i < 4; ++i) { - uint64_t val = 0; - for (int j = 0; j < 4; ++j) { - SDValue V; - bool process_upper, process_lower; - val <<= 8; - process_upper = (upper_special && (i & 1) == 0); - process_lower = (lower_special && (i & 1) == 1); - - if (process_upper || process_lower) { - if ((process_upper && upper == 0) - || (process_lower && lower == 0)) - val |= 0x80; - else if ((process_upper && upper == 0xffffffff) - || (process_lower && lower == 0xffffffff)) - val |= 0xc0; - else if ((process_upper && upper == 0x80000000) - || (process_lower && lower == 0x80000000)) - val |= (j == 0 ? 0xe0 : 0x80); - } else - val |= i * 4 + j + ((i & 1) * 16); - } + // Create upper vector if not a special pattern + if (!upper_special) { + SDValue HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } - ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + if (lower_special && upper_special) { + // Unhappy situation... both upper and lower are special, so punt with + // a target constant: + SDValue Zero = DAG.getConstant(0, MVT::i32); + HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, + Zero, Zero); + } + + for (int i = 0; i < 4; ++i) { + uint64_t val = 0; + for (int j = 0; j < 4; ++j) { + SDValue V; + bool process_upper, process_lower; + val <<= 8; + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val |= 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val |= 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val |= (j == 0 ? 0xe0 : 0x80); + } else + val |= i * 4 + j + ((i & 1) * 16); } - return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); + ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); } - } - } - return SDValue(); + return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); + } } /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on @@ -2384,81 +2394,180 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -//! Lower ISD::FABS +//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 /*! - DAGCombine does the same basic reduction: convert the double to i64 and mask - off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which - CellSPU has to legalize. Hence, the custom lowering. + f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. + All conversions to i64 are expanded to a libcall. */ - -static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { MVT OpVT = Op.getValueType(); - MVT IntVT(MVT::i64); SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::i32 && Op0VT == MVT::f64) + || OpVT == MVT::i64) { + // Convert f32 / f64 to i32 / i64 via libcall. + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::FP_TO_SINT) + ? RTLIB::getFPTOSINT(Op0VT, OpVT) + : RTLIB::getFPTOUINT(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return SDValue(); +} - assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n"); +//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 +/*! + i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. + All conversions from i64 are expanded to a libcall. + */ +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { + MVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); - SDValue iABS = - DAG.getNode(ISD::AND, IntVT, - DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0), - DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT)); + if ((OpVT == MVT::f64 && Op0VT == MVT::i32) + || Op0VT == MVT::i64) { + // Convert i32, i64 to f64 via libcall: + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::SINT_TO_FP) + ? RTLIB::getSINTTOFP(Op0VT, OpVT) + : RTLIB::getUINTTOFP(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } - return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS); + return SDValue(); } //! Lower ISD::SETCC /*! This handles MVT::f64 (double floating point) condition lowering */ - static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); + assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); + SDValue lhs = Op.getOperand(0); SDValue rhs = Op.getOperand(1); - CondCodeSDNode *CC = dyn_cast<CondCodeSDNode > (Op.getOperand(2)); MVT lhsVT = lhs.getValueType(); - SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64); - - assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); - switch (CC->get()) { - case ISD::SETOEQ: - case ISD::SETOGT: - case ISD::SETOGE: - case ISD::SETOLT: - case ISD::SETOLE: - case ISD::SETONE: - cerr << "CellSPU ISel Select: unimplemented f64 condition\n"; - abort(); - break; - case ISD::SETO: { - SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs); - SDValue i64lhs = - DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs); + MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); + APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + MVT IntVT(MVT::i64); - return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT); - } - case ISD::SETUO: { - SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs); - SDValue i64lhs = - DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs); + // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently + // selected to a NOP: + SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, lhs); + SDValue lhsHi32 = + DAG.getNode(ISD::TRUNCATE, MVT::i32, + DAG.getNode(ISD::SRL, IntVT, + i64lhs, DAG.getConstant(32, MVT::i32))); + SDValue lhsHi32abs = + DAG.getNode(ISD::AND, MVT::i32, + lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); + SDValue lhsLo32 = + DAG.getNode(ISD::TRUNCATE, MVT::i32, i64lhs); + + // SETO and SETUO only use the lhs operand: + if (CC->get() == ISD::SETO) { + // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of + // SETUO + APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, ccResultVT, + DAG.getSetCC(ccResultVT, + lhs, DAG.getConstantFP(0.0, lhsVT), + ISD::SETUO), + DAG.getConstant(ccResultAllOnes, ccResultVT)); + } else if (CC->get() == ISD::SETUO) { + // Evaluates to true if Op0 is [SQ]NaN + return DAG.getNode(ISD::AND, ccResultVT, + DAG.getSetCC(ccResultVT, + lhsHi32abs, + DAG.getConstant(0x7ff00000, MVT::i32), + ISD::SETGE), + DAG.getSetCC(ccResultVT, + lhsLo32, + DAG.getConstant(0, MVT::i32), + ISD::SETGT)); + } + + SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs); + SDValue rhsHi32 = + DAG.getNode(ISD::TRUNCATE, MVT::i32, + DAG.getNode(ISD::SRL, IntVT, + i64rhs, DAG.getConstant(32, MVT::i32))); + + // If a value is negative, subtract from the sign magnitude constant: + SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); + + // Convert the sign-magnitude representation into 2's complement: + SDValue lhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT, + lhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64lhs); + SDValue lhsSelect = + DAG.getNode(ISD::SELECT, IntVT, + lhsSelectMask, lhsSignMag2TC, i64lhs); + + SDValue rhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT, + rhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64rhs); + SDValue rhsSelect = + DAG.getNode(ISD::SELECT, IntVT, + rhsSelectMask, rhsSignMag2TC, i64rhs); + + unsigned compareOp; - return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE); - } + switch (CC->get()) { + case ISD::SETOEQ: case ISD::SETUEQ: + compareOp = ISD::SETEQ; break; + case ISD::SETOGT: case ISD::SETUGT: + compareOp = ISD::SETGT; break; + case ISD::SETOGE: case ISD::SETUGE: + compareOp = ISD::SETGE; break; + case ISD::SETOLT: case ISD::SETULT: + compareOp = ISD::SETLT; break; + case ISD::SETOLE: case ISD::SETULE: + compareOp = ISD::SETLE; break; case ISD::SETUNE: + case ISD::SETONE: + compareOp = ISD::SETNE; break; default: cerr << "CellSPU ISel Select: unimplemented f64 condition\n"; abort(); break; } - return SDValue(); + SDValue result = + DAG.getSetCC(ccResultVT, lhsSelect, rhsSelect, (ISD::CondCode) compareOp); + + if ((CC->get() & 0x8) == 0) { + // Ordered comparison: + SDValue lhsNaN = DAG.getSetCC(ccResultVT, + lhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue rhsNaN = DAG.getSetCC(ccResultVT, + rhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue ordered = DAG.getNode(ISD::AND, ccResultVT, lhsNaN, rhsNaN); + + result = DAG.getNode(ISD::AND, ccResultVT, ordered, result); + } + + return result; } //! Lower ISD::SELECT_CC @@ -2566,8 +2675,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::JumpTable: return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::Constant: - return LowerConstant(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG); case ISD::FORMAL_ARGUMENTS: @@ -2590,12 +2697,17 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) break; } - case ISD::FABS: - return LowerFABS(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG, *this); + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return LowerINT_TO_FP(Op, DAG, *this); // Vector-related lowering. case ISD::BUILD_VECTOR: - return SPU::LowerBUILD_VECTOR(Op, DAG); + return LowerBUILD_VECTOR(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 079f3ba..24c2803 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -61,7 +61,7 @@ namespace llvm { }; } - //! Utility functions specific to CellSPU-only: + //! Utility functions specific to CellSPU: namespace SPU { SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG, MVT ValueType); @@ -78,7 +78,7 @@ namespace llvm { SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM); - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat); SDValue getBorrowGenerateShufMask(SelectionDAG &DAG); SDValue getCarryGenerateShufMask(SelectionDAG &DAG); diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 91d52fa..f35a42d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -155,13 +155,13 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, case SPU::ORr8_r32: case SPU::ORr32_r16: case SPU::ORr32_r8: - case SPU::ORr32_r64: case SPU::ORr16_r64: case SPU::ORr8_r64: - case SPU::ORr64_r32: case SPU::ORr64_r16: case SPU::ORr64_r8: */ + case SPU::ORr64_r32: + case SPU::ORr32_r64: case SPU::ORf32_r32: case SPU::ORr32_f32: case SPU::ORf64_r64: diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 2834a1e..8db2fa7 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1259,6 +1259,9 @@ multiclass BitwiseAnd def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), [/* Intentionally does not match a pattern */]>; + def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB), + [/* Intentionally does not match a pattern */]>; + // Could use v4i32, but won't for clarity def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* Intentionally does not match a pattern */]>; @@ -1525,17 +1528,17 @@ multiclass BitwiseOr // Conversion from R32C to register def r32_r16: ORCvtFormR32Reg<R16C>; def r32_r8: ORCvtFormR32Reg<R8C>; +*/ - // Conversion from register to R64C: + // Conversion to register from R64C: def r32_r64: ORCvtFormR64Reg<R32C>; - def r16_r64: ORCvtFormR64Reg<R16C>; - def r8_r64: ORCvtFormR64Reg<R8C>; + // def r16_r64: ORCvtFormR64Reg<R16C>; + // def r8_r64: ORCvtFormR64Reg<R8C>; - // Conversion from R64C to register + // Conversion to R64C from register def r64_r32: ORCvtFormRegR64<R32C>; - def r64_r16: ORCvtFormRegR64<R16C>; - def r64_r8: ORCvtFormRegR64<R8C>; -*/ + // def r64_r16: ORCvtFormRegR64<R16C>; + // def r64_r8: ORCvtFormRegR64<R8C>; // bitconvert patterns: def r32_f32: ORCvtFormR32Reg<R32FP, @@ -1910,11 +1913,11 @@ class SELBInst<dag OOL, dag IOL, list<dag> pattern>: RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC", IntegerOp, pattern>; -class SELBVecInst<ValueType vectype>: +class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>: SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), [(set (vectype VECREG:$rT), (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)), - (and (vnot (vectype VECREG:$rC)), + (and (vnot_frag (vectype VECREG:$rC)), (vectype VECREG:$rA))))]>; class SELBVecVCondInst<ValueType vectype>: @@ -1947,7 +1950,7 @@ multiclass SelectBits def v16i8: SELBVecInst<v16i8>; def v8i16: SELBVecInst<v8i16>; def v4i32: SELBVecInst<v4i32>; - def v2i64: SELBVecInst<v2i64>; + def v2i64: SELBVecInst<v2i64, vnot_conv>; def r128: SELBRegInst<GPRC>; def r64: SELBRegInst<R64C>; @@ -4321,6 +4324,13 @@ def : Pat<(fabs (v4f32 VECREG:$rA)), (ANDfabsvec (v4f32 VECREG:$rA), (v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>; +def : Pat<(fabs R64FP:$rA), + (ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>; + +def : Pat<(fabs (v2f64 VECREG:$rA)), + (ANDfabsvec (v2f64 VECREG:$rA), + (v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>; + //===----------------------------------------------------------------------===// // Hint for branch instructions: //===----------------------------------------------------------------------===// |