diff options
Diffstat (limited to 'lib/Target/CellSPU/SPUISelLowering.cpp')
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 2673 |
1 files changed, 2673 insertions, 0 deletions
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp new file mode 100644 index 0000000..91c0024 --- /dev/null +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -0,0 +1,2673 @@ +//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPUTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SPURegisterNames.h" +#include "SPUISelLowering.h" +#include "SPUTargetMachine.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetOptions.h" + +#include <map> + +using namespace llvm; + +// Used in getTargetNodeName() below +namespace { + std::map<unsigned, const char *> node_names; + + //! MVT::ValueType mapping to useful data for Cell SPU + struct valtype_map_s { + const MVT::ValueType valtype; + const int prefslot_byte; + }; + + const valtype_map_s valtype_map[] = { + { MVT::i1, 3 }, + { MVT::i8, 3 }, + { MVT::i16, 2 }, + { MVT::i32, 0 }, + { MVT::f32, 0 }, + { MVT::i64, 0 }, + { MVT::f64, 0 }, + { MVT::i128, 0 } + }; + + const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); + + const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) { + const valtype_map_s *retval = 0; + + for (size_t i = 0; i < n_valtype_map; ++i) { + if (valtype_map[i].valtype == VT) { + retval = valtype_map + i; + break; + } + } + +#ifndef NDEBUG + if (retval == 0) { + cerr << "getValueTypeMapEntry returns NULL for " + << MVT::getValueTypeString(VT) + << "\n"; + abort(); + } +#endif + + return retval; + } + + //! Predicate that returns true if operand is a memory target + /*! + \arg Op Operand to test + \return true if the operand is a memory target (i.e., global + address, external symbol, constant pool) or an existing D-Form + address. + */ + bool isMemoryOperand(const SDOperand &Op) + { + const unsigned Opc = Op.getOpcode(); + return (Opc == ISD::GlobalAddress + || Opc == ISD::GlobalTLSAddress + || Opc == ISD::FrameIndex + || Opc == ISD::JumpTable + || Opc == ISD::ConstantPool + || Opc == ISD::ExternalSymbol + || Opc == ISD::TargetGlobalAddress + || Opc == ISD::TargetGlobalTLSAddress + || Opc == ISD::TargetFrameIndex + || Opc == ISD::TargetJumpTable + || Opc == ISD::TargetConstantPool + || Opc == ISD::TargetExternalSymbol + || Opc == SPUISD::DFormAddr); + } +} + +SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) + : TargetLowering(TM), + SPUTM(TM) +{ + // Fold away setcc operations if possible. + setPow2DivIsCheap(); + + // Use _setjmp/_longjmp instead of setjmp/longjmp. + setUseUnderscoreSetJmp(true); + setUseUnderscoreLongJmp(true); + + // Set up the SPU's register classes: + // NOTE: i8 register class is not registered because we cannot determine when + // we need to zero or sign extend for custom-lowered loads and stores. + addRegisterClass(MVT::i16, SPU::R16CRegisterClass); + addRegisterClass(MVT::i32, SPU::R32CRegisterClass); + addRegisterClass(MVT::i64, SPU::R64CRegisterClass); + addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); + addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); + addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); + + // SPU has no sign or zero extended loads for i1, i8, i16: + setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom); + setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setStoreXAction(MVT::i1, Custom); + + setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom); + setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom); + setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom); + setStoreXAction(MVT::i8, Custom); + + setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom); + setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom); + setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom); + + // SPU constant load actions are custom lowered: + setOperationAction(ISD::Constant, MVT::i64, Custom); + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); + + // SPU's loads and stores have to be custom lowered: + for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128; + ++sctype) { + setOperationAction(ISD::LOAD, sctype, Custom); + setOperationAction(ISD::STORE, sctype, Custom); + } + + // SPU supports BRCOND, although DAGCombine will convert BRCONDs + // into BR_CCs. BR_CC instructions are custom selected in + // SPUDAGToDAGISel. + setOperationAction(ISD::BRCOND, MVT::Other, Legal); + + // Expand the jumptable branches + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + // SPU has no intrinsics for these particular operations: + setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); + setOperationAction(ISD::MEMSET, MVT::Other, Expand); + setOperationAction(ISD::MEMCPY, MVT::Other, Expand); + + // PowerPC has no SREM/UREM instructions + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + // We don't support sin/cos/sqrt/fmod + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + + // If we're enabling GP optimizations, use hardware square root + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // SPU can do rotate right and left, so legalize it... but customize for i8 + // because instructions don't exist. + setOperationAction(ISD::ROTR, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i16, Legal); + setOperationAction(ISD::ROTR, MVT::i8, Custom); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTL, MVT::i16, Legal); + setOperationAction(ISD::ROTL, MVT::i8, Custom); + // SPU has no native version of shift left/right for i8 + setOperationAction(ISD::SHL, MVT::i8, Custom); + setOperationAction(ISD::SRL, MVT::i8, Custom); + setOperationAction(ISD::SRA, MVT::i8, Custom); + + // Custom lower i32 multiplications + setOperationAction(ISD::MUL, MVT::i32, Custom); + + // Need to custom handle (some) common i8 math ops + setOperationAction(ISD::SUB, MVT::i8, Custom); + setOperationAction(ISD::MUL, MVT::i8, Custom); + + // SPU does not have BSWAP. It does have i32 support CTLZ. + // CTPOP has to be custom lowered. + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i8, Custom); + setOperationAction(ISD::CTPOP, MVT::i16, Custom); + setOperationAction(ISD::CTPOP, MVT::i32, Custom); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); + + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i64, Expand); + + setOperationAction(ISD::CTLZ , MVT::i32, Legal); + + // SPU does not have select or setcc + setOperationAction(ISD::SELECT, MVT::i1, Expand); + setOperationAction(ISD::SELECT, MVT::i8, Expand); + setOperationAction(ISD::SELECT, MVT::i16, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + + setOperationAction(ISD::SETCC, MVT::i1, Expand); + setOperationAction(ISD::SETCC, MVT::i8, Expand); + setOperationAction(ISD::SETCC, MVT::i16, Expand); + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::i64, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f64, Expand); + + // SPU has a legal FP -> signed INT instruction + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + + // FDIV on SPU requires custom lowering + setOperationAction(ISD::FDIV, MVT::f32, Custom); + //setOperationAction(ISD::FDIV, MVT::f64, Custom); + + // SPU has [U|S]INT_TO_FP + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); + + // We cannot sextinreg(i1). Expand to shifts. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // Support label based line numbers. + setOperationAction(ISD::LOCATION, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + + // We want to legalize GlobalAddress and ConstantPool nodes into the + // appropriate instructions to materialize the address. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::f32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::f64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + + // RET must be custom lowered, to meet ABI requirements + setOperationAction(ISD::RET, MVT::Other, Custom); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); + + // Cell SPU has instructions for converting between i64 and fp. + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + + // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); + + // BUILD_PAIR can't be handled natively, and should be expanded to shl/or + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // First set operation action for all vector types to expand. Then we + // will selectively turn on ones that can be effectively codegen'd. + addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); + + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + // add/sub are legal for all supported vector VT's. + setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); + // mul has to be custom lowered. + setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom); + + setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal); + setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal); + + // These operations need to be expanded: + setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom); + + // Custom lower build_vector, constant pool spills, insert and + // extract vector elements: + setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); + } + + setOperationAction(ISD::MUL, MVT::v16i8, Custom); + setOperationAction(ISD::AND, MVT::v16i8, Custom); + setOperationAction(ISD::OR, MVT::v16i8, Custom); + setOperationAction(ISD::XOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + + setSetCCResultType(MVT::i32); + setShiftAmountType(MVT::i32); + setSetCCResultContents(ZeroOrOneSetCCResult); + + setStackPointerRegisterToSaveRestore(SPU::R1); + + // We have target-specific dag combine patterns for the following nodes: + // e.g., setTargetDAGCombine(ISD::SUB); + + computeRegisterProperties(); +} + +const char * +SPUTargetLowering::getTargetNodeName(unsigned Opcode) const +{ + if (node_names.empty()) { + node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; + node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; + node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; + node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; + node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr"; + node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr"; + node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; + node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; + node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; + node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK"; + node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; + node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR"; + node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0"; + node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED"; + node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT"; + node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT"; + node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT"; + node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT"; + node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; + node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; + node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; + node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; + node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; + node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; + node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; + node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; + node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; + node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] = + "SPUISD::ROTBYTES_RIGHT_Z"; + node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] = + "SPUISD::ROTBYTES_RIGHT_S"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] = + "SPUISD::ROTBYTES_LEFT_CHAINED"; + node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI"; + node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; + node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant"; + node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; + node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; + node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; + } + + std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); + + return ((i != node_names.end()) ? i->second : 0); +} + +//===----------------------------------------------------------------------===// +// Calling convention code: +//===----------------------------------------------------------------------===// + +#include "SPUGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// LowerOperation implementation +//===----------------------------------------------------------------------===// + +/// Custom lower loads for CellSPU +/*! + All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements + within a 16-byte block, we have to rotate to extract the requested element. + */ +static SDOperand +LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + LoadSDNode *LN = cast<LoadSDNode>(Op); + SDOperand basep = LN->getBasePtr(); + SDOperand the_chain = LN->getChain(); + MVT::ValueType VT = LN->getLoadedVT(); + MVT::ValueType OpVT = Op.Val->getValueType(0); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + ISD::LoadExtType ExtType = LN->getExtensionType(); + unsigned alignment = LN->getAlignment(); + const valtype_map_s *vtm = getValueTypeMapEntry(VT); + SDOperand Ops[8]; + + // For an extending load of an i1 variable, just call it i8 (or whatever we + // were passed) and make it zero-extended: + if (VT == MVT::i1) { + VT = OpVT; + ExtType = ISD::ZEXTLOAD; + } + + switch (LN->getAddressingMode()) { + case ISD::UNINDEXED: { + SDOperand result; + SDOperand rot_op, rotamt; + SDOperand ptrp; + int c_offset; + int c_rotamt; + + // The vector type we really want to be when we load the 16-byte chunk + MVT::ValueType vecVT, opVecVT; + + if (VT != MVT::i1) + vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + else + vecVT = MVT::v16i8; + + opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT))); + + if (basep.getOpcode() == ISD::ADD) { + const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1)); + + assert(CN != NULL + && "LowerLOAD: ISD::ADD operand 1 is not constant"); + + c_offset = (int) CN->getValue(); + c_rotamt = (int) (c_offset & 0xf); + + // Adjust the rotation amount to ensure that the final result ends up in + // the preferred slot: + c_rotamt -= vtm->prefslot_byte; + ptrp = basep.getOperand(0); + } else { + c_offset = 0; + c_rotamt = -vtm->prefslot_byte; + ptrp = basep; + } + + if (alignment == 16) { + // 16-byte aligned load into preferred slot, no rotation + if (c_rotamt == 0) { + if (isMemoryOperand(ptrp)) + // Return unchanged + return SDOperand(); + else { + // Return modified D-Form address for pointer: + ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT, + ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT)); + if (VT == OpVT) + return DAG.getLoad(VT, LN->getChain(), ptrp, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + else + return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(), + LN->getSrcValueOffset(), OpVT, + LN->isVolatile(), 16); + } + } else { + // Need to rotate... + if (c_rotamt < 0) + c_rotamt += 16; + // Realign the base pointer, with a D-Form address + if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp)) + basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, + ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32)); + else + basep = ptrp; + + // Rotate the load: + rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + the_chain = rot_op.getValue(1); + rotamt = DAG.getConstant(c_rotamt, MVT::i16); + + SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); + Ops[0] = the_chain; + Ops[1] = rot_op; + Ops[2] = rotamt; + + result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3); + the_chain = result.getValue(1); + + if (VT == OpVT || ExtType == ISD::EXTLOAD) { + SDVTList scalarvts; + Ops[0] = the_chain; + Ops[1] = result; + if (OpVT == VT) { + scalarvts = DAG.getVTList(VT, MVT::Other); + } else { + scalarvts = DAG.getVTList(OpVT, MVT::Other); + } + + result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT), + result); + Ops[0] = the_chain; + Ops[1] = result; + result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2); + the_chain = result.getValue(1); + } else { + // Handle the sign and zero-extending loads for i1 and i8: + unsigned NewOpC; + + if (ExtType == ISD::SEXTLOAD) { + NewOpC = (OpVT == MVT::i1 + ? SPUISD::EXTRACT_I1_SEXT + : SPUISD::EXTRACT_I8_SEXT); + } else if (ExtType == ISD::ZEXTLOAD) { + NewOpC = (OpVT == MVT::i1 + ? SPUISD::EXTRACT_I1_ZEXT + : SPUISD::EXTRACT_I8_ZEXT); + } + + result = DAG.getNode(NewOpC, OpVT, result); + } + + SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); + SDOperand retops[2] = { result, the_chain }; + + result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); + return result; + /*UNREACHED*/ + } + } else { + // Misaligned 16-byte load: + if (basep.getOpcode() == ISD::LOAD) { + LN = cast<LoadSDNode>(basep); + if (LN->getAlignment() == 16) { + // We can verify that we're really loading from a 16-byte aligned + // chunk. Encapsulate basep as a D-Form address and return a new + // load: + basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep, + DAG.getConstant(0, PtrVT)); + if (OpVT == VT) + return DAG.getLoad(VT, LN->getChain(), basep, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + else + return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep, + LN->getSrcValue(), LN->getSrcValueOffset(), + OpVT, LN->isVolatile(), 16); + } + } + + // Catch all other cases where we can't guarantee that we have a + // 16-byte aligned entity, which means resorting to an X-form + // address scheme: + + SDOperand ZeroOffs = DAG.getConstant(0, PtrVT); + SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs); + SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs); + + ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp); + + SDOperand alignLoad = + DAG.getLoad(opVecVT, LN->getChain(), ptrp, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + + SDOperand insertEltOp = + DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp); + + result = DAG.getNode(SPUISD::SHUFB, opVecVT, + alignLoad, + alignLoad, + DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp)); + + result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result); + + SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); + SDOperand retops[2] = { result, the_chain }; + + result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); + return result; + } + break; + } + case ISD::PRE_INC: + case ISD::PRE_DEC: + case ISD::POST_INC: + case ISD::POST_DEC: + case ISD::LAST_INDEXED_MODE: + cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + "UNINDEXED\n"; + cerr << (unsigned) LN->getAddressingMode() << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +/// Custom lower stores for CellSPU +/*! + All CellSPU stores are aligned to 16-byte boundaries, so for elements + within a 16-byte block, we have to generate a shuffle to insert the + requested element into its place, then store the resulting block. + */ +static SDOperand +LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + StoreSDNode *SN = cast<StoreSDNode>(Op); + SDOperand Value = SN->getValue(); + MVT::ValueType VT = Value.getValueType(); + MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT()); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDOperand the_chain = SN->getChain(); + unsigned alignment = SN->getAlignment(); + const valtype_map_s *vtm = getValueTypeMapEntry(VT); + + switch (SN->getAddressingMode()) { + case ISD::UNINDEXED: { + SDOperand basep = SN->getBasePtr(); + SDOperand ptrOp; + int offset; + + if (basep.getOpcode() == ISD::ADD) { + const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1)); + assert(CN != NULL + && "LowerSTORE: ISD::ADD operand 1 is not constant"); + offset = unsigned(CN->getValue()); + ptrOp = basep.getOperand(0); + DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = " + << offset + << "\n"); + } else { + ptrOp = basep; + offset = 0; + } + + // The vector type we really want to load from the 16-byte chunk, except + // in the case of MVT::i1, which has to be v16i8. + unsigned vecVT, stVecVT; + + if (StVT != MVT::i1) + stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT))); + else + stVecVT = MVT::v16i8; + vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + + // Realign the pointer as a D-Form address (ptrOp is the pointer, + // to force a register load with the address; basep is the actual + // dform addr offs($reg). + ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp, + DAG.getConstant(0, PtrVT)); + basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, + ptrOp, DAG.getConstant((offset & ~0xf), PtrVT)); + + // Create the 16-byte aligned vector load + SDOperand alignLoad = + DAG.getLoad(vecVT, the_chain, basep, + SN->getSrcValue(), SN->getSrcValueOffset(), + SN->isVolatile(), 16); + the_chain = alignLoad.getValue(1); + + LoadSDNode *LN = cast<LoadSDNode>(alignLoad); + SDOperand theValue = SN->getValue(); + SDOperand result; + + if (StVT != VT + && (theValue.getOpcode() == ISD::AssertZext + || theValue.getOpcode() == ISD::AssertSext)) { + // Drill down and get the value for zero- and sign-extended + // quantities + theValue = theValue.getOperand(0); + } + + SDOperand insertEltOp = + DAG.getNode(SPUISD::INSERT_MASK, stVecVT, + DAG.getNode(SPUISD::DFormAddr, PtrVT, + ptrOp, + DAG.getConstant((offset & 0xf), PtrVT))); + + result = DAG.getNode(SPUISD::SHUFB, vecVT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue), + alignLoad, + DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp)); + + result = DAG.getStore(the_chain, result, basep, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), LN->getAlignment()); + + return result; + /*UNREACHED*/ + } + case ISD::PRE_INC: + case ISD::PRE_DEC: + case ISD::POST_INC: + case ISD::POST_DEC: + case ISD::LAST_INDEXED_MODE: + cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + "UNINDEXED\n"; + cerr << (unsigned) SN->getAddressingMode() << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +/// Generate the address of a constant pool entry. +static SDOperand +LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT::ValueType PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + Constant *C = CP->getConstVal(); + SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); + const TargetMachine &TM = DAG.getTarget(); + SDOperand Zero = DAG.getConstant(0, PtrVT); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + // Just return the SDOperand with the constant pool address in it. + return CPI; + } else { + // Generate hi/lo address pair + SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); + SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); + + return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); + } + } + + assert(0 && + "LowerConstantPool: Relocation model other than static not supported."); + return SDOperand(); +} + +static SDOperand +LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT::ValueType PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + SDOperand Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + // Just return the SDOperand with the jump table address in it. + return JTI; + } else { + // Generate hi/lo address pair + SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero); + SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero); + + return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); + } + } + + assert(0 && + "LowerJumpTable: Relocation model other than static not supported."); + return SDOperand(); +} + +static SDOperand +LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + MVT::ValueType PtrVT = Op.getValueType(); + GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); + GlobalValue *GV = GSDN->getGlobal(); + SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + SDOperand Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + // Generate a local store address + return GA; + } else { + // Generate hi/lo address pair + SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); + SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); + + return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); + } + } else { + cerr << "LowerGlobalAddress: Relocation model other than static not " + << "supported.\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +//! Custom lower i64 integer constants +/*! + This code inserts all of the necessary juggling that needs to occur to load + a 64-bit constant into a register. + */ +static SDOperand +LowerConstant(SDOperand Op, SelectionDAG &DAG) { + unsigned VT = Op.getValueType(); + ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val); + + if (VT == MVT::i64) { + SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64); + return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); + + } else { + cerr << "LowerConstant: unhandled constant type " + << MVT::getValueTypeString(VT) + << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +//! Custom lower single precision floating point constants +/*! + "float" immediates can be lowered as if they were unsigned 32-bit integers. + The SPUISD::SFPConstant pseudo-instruction handles this in the instruction + target description. + */ +static SDOperand +LowerConstantFP(SDOperand Op, SelectionDAG &DAG) { + unsigned VT = Op.getValueType(); + ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val); + + assert((FP != 0) && + "LowerConstantFP: Node is not ConstantFPSDNode"); + + const APFloat &apf = FP->getValueAPF(); + + if (VT == MVT::f32) { + return DAG.getNode(SPUISD::SFPConstant, VT, + DAG.getTargetConstantFP(apf.convertToFloat(), VT)); + } else if (VT == MVT::f64) { + uint64_t dbits = DoubleToBits(apf.convertToDouble()); + return DAG.getNode(ISD::BIT_CONVERT, VT, + LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG)); + } + + return SDOperand(); +} + +static SDOperand +LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex) +{ + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + SSARegMap *RegMap = MF.getSSARegMap(); + SmallVector<SDOperand, 8> ArgValues; + SDOperand Root = Op.getOperand(0); + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; + + const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); + const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + + unsigned ArgOffset = SPUFrameInfo::minStackSize(); + unsigned ArgRegIdx = 0; + unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); + + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Add DAG nodes to load the arguments or copy them out of registers. + for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { + SDOperand ArgVal; + bool needsLoad = false; + MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); + unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8; + + switch (ObjectVT) { + default: { + cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << MVT::getValueTypeString(ObjectVT) + << "\n"; + abort(); + } + case MVT::i8: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::i16: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::i32: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::i64: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::f32: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::f64: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + case MVT::v2f64: + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (!isVarArg && ArgRegIdx < NumArgRegs) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); + ++ArgRegIdx; + } else { + needsLoad = true; + } + break; + } + + // We need to load the argument to a virtual register if we determined above + // that we ran out of physical registers of the appropriate type + if (needsLoad) { + // If the argument is actually used, emit a load from the right stack + // slot. + if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); + } else { + // Don't emit a dead load. + ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT); + } + + ArgOffset += StackSlotSize; + } + + ArgValues.push_back(ArgVal); + } + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, + ArgOffset); + SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + // If this function is vararg, store any remaining integer argument regs to + // their spots on the stack so that they may be loaded by deferencing the + // result of va_next. + SmallVector<SDOperand, 8> MemOps; + for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { + unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass); + MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); + SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); + } + + ArgValues.push_back(Root); + + // Return the new list of results. + std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(), + Op.Val->value_end()); + return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); +} + +/// isLSAAddress - Return the immediate to use if the specified +/// value is representable as a LSA address. +static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + if (!C) return 0; + + int Addr = C->getValue(); + if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. + (Addr << 14 >> 14) != Addr) + return 0; // Top 14 bits have to be sext of immediate. + + return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val; +} + +static +SDOperand +LowerCALL(SDOperand Op, SelectionDAG &DAG) { + SDOperand Chain = Op.getOperand(0); +#if 0 + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; + bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; +#endif + SDOperand Callee = Op.getOperand(4); + unsigned NumOps = (Op.getNumOperands() - 5) / 2; + unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); + const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); + const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + + // Handy pointer type + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Accumulate how many bytes are to be pushed on the stack, including the + // linkage area, and parameter passing area. According to the SPU ABI, + // we minimally need space for [LR] and [SP] + unsigned NumStackBytes = SPUFrameInfo::minStackSize(); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32); + + // Figure out which arguments are going to go in registers, and which in + // memory. + unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] + unsigned ArgRegIdx = 0; + + // Keep track of registers passing arguments + std::vector<std::pair<unsigned, SDOperand> > RegsToPass; + // And the arguments passed on the stack + SmallVector<SDOperand, 8> MemOpChains; + + for (unsigned i = 0; i != NumOps; ++i) { + SDOperand Arg = Op.getOperand(5+2*i); + + // PtrOff will be used to store the current argument to the stack if a + // register cannot be found for it. + SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); + + switch (Arg.getValueType()) { + default: assert(0 && "Unexpected ValueType for argument!"); + case MVT::i32: + case MVT::i64: + case MVT::i128: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + case MVT::f32: + case MVT::f64: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += StackSlotSize; + } + break; + } + } + + // Update number of stack bytes actually used, insert a call sequence start + NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize()); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT)); + + if (!MemOpChains.empty()) { + // Adjust the stack pointer for the stack arguments. + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + } + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDOperand InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } + + std::vector<MVT::ValueType> NodeTys; + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + + SmallVector<SDOperand, 8> Ops; + unsigned CallOpc = SPUISD::CALL; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + GlobalValue *GV = G->getGlobal(); + unsigned CalleeVT = Callee.getValueType(); + + // Turn calls to targets that are defined (i.e., have bodies) into BRSL + // style calls, otherwise, external symbols are BRASL calls. + // NOTE: + // This may be an unsafe assumption for JIT and really large compilation + // units. + if (GV->isDeclaration()) { + Callee = DAG.getGlobalAddress(GV, CalleeVT); + } else { + Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, + DAG.getTargetGlobalAddress(GV, CalleeVT), + DAG.getConstant(0, PtrVT)); + } + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); + else if (SDNode *Dest = isLSAAddress(Callee, DAG)) + // If this is an absolute destination address that appears to be a legal + // local store address, use the munged value. + Callee = SDOperand(Dest, 0); + + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.Val) + Ops.push_back(InFlag); + Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + SDOperand ResultVals[3]; + unsigned NumResults = 0; + NodeTys.clear(); + + // If the call has results, copy the values out of the ret val registers. + switch (Op.Val->getValueType(0)) { + default: assert(0 && "Unexpected ret value!"); + case MVT::Other: break; + case MVT::i32: + if (Op.Val->getValueType(1) == MVT::i32) { + Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, + Chain.getValue(2)).getValue(1); + ResultVals[1] = Chain.getValue(0); + NumResults = 2; + NodeTys.push_back(MVT::i32); + } else { + Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + } + NodeTys.push_back(MVT::i32); + break; + case MVT::i64: + Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + NodeTys.push_back(MVT::i64); + break; + case MVT::f32: + case MVT::f64: + Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0), + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + NodeTys.push_back(Op.Val->getValueType(0)); + break; + case MVT::v2f64: + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0), + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + NodeTys.push_back(Op.Val->getValueType(0)); + break; + } + + Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, + DAG.getConstant(NumStackBytes, PtrVT)); + NodeTys.push_back(MVT::Other); + + // If the function returns void, just return the chain. + if (NumResults == 0) + return Chain; + + // Otherwise, merge everything together with a MERGE_VALUES node. + ResultVals[NumResults++] = Chain; + SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, + ResultVals, NumResults); + return Res.getValue(Op.ResNo); +} + +static SDOperand +LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { + SmallVector<CCValAssign, 16> RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CCState CCInfo(CC, isVarArg, TM, RVLocs); + CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); + } + + SDOperand Chain = Op.getOperand(0); + SDOperand Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); + Flag = Chain.getValue(1); + } + + if (Flag.Val) + return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag); + else + return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain); +} + + +//===----------------------------------------------------------------------===// +// Vector related lowering: +//===----------------------------------------------------------------------===// + +static ConstantSDNode * +getVecImm(SDNode *N) { + SDOperand OpVal(0, 0); + + // Check to see if this buildvec has a single non-undef value in its elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.Val == 0) + OpVal = N->getOperand(i); + else if (OpVal != N->getOperand(i)) + return 0; + } + + if (OpVal.Val != 0) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { + return CN; + } + } + + return 0; // All UNDEF: use implicit def.; not Constant node +} + +/// get_vec_i18imm - Test if this vector is a vector filled with the same value +/// and the value fits into an unsigned 18-bit constant, and if so, return the +/// constant +SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + uint64_t Value = CN->getValue(); + if (Value <= 0x3ffff) + return DAG.getConstant(Value, ValueType); + } + + return SDOperand(); +} + +/// get_vec_i16imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 16-bit constant, and if so, return the +/// constant +SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + if (ValueType == MVT::i32) { + int Value = (int) CN->getValue(); + int SExtValue = ((Value & 0xffff) << 16) >> 16; + + if (Value == SExtValue) + return DAG.getConstant(Value, ValueType); + } else if (ValueType == MVT::i16) { + short Value = (short) CN->getValue(); + int SExtValue = ((int) Value << 16) >> 16; + + if (Value == (short) SExtValue) + return DAG.getConstant(Value, ValueType); + } else if (ValueType == MVT::i64) { + int64_t Value = CN->getValue(); + int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16); + + if (Value == SExtValue) + return DAG.getConstant(Value, ValueType); + } + } + + return SDOperand(); +} + +/// get_vec_i10imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 10-bit constant, and if so, return the +/// constant +SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int Value = (int) CN->getValue(); + if ((ValueType == MVT::i32 && isS10Constant(Value)) + || (ValueType == MVT::i16 && isS10Constant((short) Value))) + return DAG.getConstant(Value, ValueType); + } + + return SDOperand(); +} + +/// get_vec_i8imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 8-bit constant, and if so, return the +/// constant. +/// +/// @note: The incoming vector is v16i8 because that's the only way we can load +/// constant vectors. Thus, we test to see if the upper and lower bytes are the +/// same value. +SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int Value = (int) CN->getValue(); + if (ValueType == MVT::i16 + && Value <= 0xffff /* truncated from uint64_t */ + && ((short) Value >> 8) == ((short) Value & 0xff)) + return DAG.getConstant(Value & 0xff, ValueType); + else if (ValueType == MVT::i8 + && (Value & 0xff) == Value) + return DAG.getConstant(Value, ValueType); + } + + return SDOperand(); +} + +/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 16-bit constant, and if so, return the +/// constant +SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, + MVT::ValueType ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + uint64_t Value = CN->getValue(); + if ((ValueType == MVT::i32 + && ((unsigned) Value & 0xffff0000) == (unsigned) Value) + || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) + return DAG.getConstant(Value >> 16, ValueType); + } + + return SDOperand(); +} + +/// get_v4i32_imm - Catch-all for general 32-bit constant vectors +SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { + if (ConstantSDNode *CN = getVecImm(N)) { + return DAG.getConstant((unsigned) CN->getValue(), MVT::i32); + } + + return SDOperand(); +} + +/// get_v4i32_imm - Catch-all for general 64-bit constant vectors +SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { + if (ConstantSDNode *CN = getVecImm(N)) { + return DAG.getConstant((unsigned) CN->getValue(), MVT::i64); + } + + return SDOperand(); +} + +// If this is a vector of constants or undefs, get the bits. A bit in +// UndefBits is set if the corresponding element of the vector is an +// ISD::UNDEF value. For undefs, the corresponding VectorBits values are +// zero. Return true if this is not an array of constants, false if it is. +// +static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], + uint64_t UndefBits[2]) { + // Start with zero'd results. + VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; + + unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDOperand OpVal = BV->getOperand(i); + + unsigned PartNo = i >= e/2; // In the upper 128 bits? + unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. + + uint64_t EltBits = 0; + if (OpVal.getOpcode() == ISD::UNDEF) { + uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize); + UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); + continue; + } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { + EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize)); + } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { + const APFloat &apf = CN->getValueAPF(); + EltBits = (CN->getValueType(0) == MVT::f32 + ? FloatToBits(apf.convertToFloat()) + : DoubleToBits(apf.convertToDouble())); + } else { + // Nonconstant element. + return true; + } + + VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); + } + + //printf("%llx %llx %llx %llx\n", + // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); + return false; +} + +/// If this is a splat (repetition) of a value across the whole vector, return +/// the smallest size that splats it. For example, "0x01010101010101..." is a +/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and +/// SplatSize = 1 byte. +static bool isConstantSplat(const uint64_t Bits128[2], + const uint64_t Undef128[2], + int MinSplatBits, + uint64_t &SplatBits, uint64_t &SplatUndef, + int &SplatSize) { + // Don't let undefs prevent splats from matching. See if the top 64-bits are + // the same as the lower 64-bits, ignoring undefs. + uint64_t Bits64 = Bits128[0] | Bits128[1]; + uint64_t Undef64 = Undef128[0] & Undef128[1]; + uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); + uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); + uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); + uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); + + if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) { + if (MinSplatBits < 64) { + + // Check that the top 32-bits are the same as the lower 32-bits, ignoring + // undefs. + if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) { + if (MinSplatBits < 32) { + + // If the top 16-bits are different than the lower 16-bits, ignoring + // undefs, we have an i32 splat. + if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) { + if (MinSplatBits < 16) { + // If the top 8-bits are different than the lower 8-bits, ignoring + // undefs, we have an i16 splat. + if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) { + // Otherwise, we have an 8-bit splat. + SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); + SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); + SplatSize = 1; + return true; + } + } else { + SplatBits = Bits16; + SplatUndef = Undef16; + SplatSize = 2; + return true; + } + } + } else { + SplatBits = Bits32; + SplatUndef = Undef32; + SplatSize = 4; + return true; + } + } + } else { + SplatBits = Bits128[0]; + SplatUndef = Undef128[0]; + SplatSize = 8; + return true; + } + } + + return false; // Can't be a splat if two pieces don't match. +} + +// If this is a case we can't handle, return null and let the default +// expansion code take care of it. If we CAN select this case, and if it +// selects to a single instruction, return Op. Otherwise, if we can codegen +// this case more efficiently than a constant pool load, lower it to the +// sequence of ops that should be used. +static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + // If this is a vector of constants or undefs, get the bits. A bit in + // UndefBits is set if the corresponding element of the vector is an + // ISD::UNDEF value. For undefs, the corresponding VectorBits values are + // zero. + uint64_t VectorBits[2]; + uint64_t UndefBits[2]; + uint64_t SplatBits, SplatUndef; + int SplatSize; + if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits) + || !isConstantSplat(VectorBits, UndefBits, + MVT::getSizeInBits(MVT::getVectorElementType(VT)), + SplatBits, SplatUndef, SplatSize)) + return SDOperand(); // Not a constant vector, not a splat. + + switch (VT) { + default: + case MVT::v4f32: { + uint32_t Value32 = SplatBits; + assert(SplatSize == 4 + && "LowerBUILD_VECTOR: Unexpected floating point vector element."); + // NOTE: pretend the constant is an integer. LLVM won't load FP constants + SDOperand T = DAG.getConstant(Value32, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T)); + break; + } + case MVT::v2f64: { + uint64_t f64val = SplatBits; + assert(SplatSize == 8 + && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size."); + // NOTE: pretend the constant is an integer. LLVM won't load FP constants + SDOperand T = DAG.getConstant(f64val, MVT::i64); + return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); + break; + } + case MVT::v16i8: { + // 8-bit constants have to be expanded to 16-bits + unsigned short Value16 = SplatBits | (SplatBits << 8); + SDOperand Ops[8]; + for (int i = 0; i < 8; ++i) + Ops[i] = DAG.getConstant(Value16, MVT::i16); + return DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8)); + } + case MVT::v8i16: { + unsigned short Value16; + if (SplatSize == 2) + Value16 = (unsigned short) (SplatBits & 0xffff); + else + Value16 = (unsigned short) (SplatBits | (SplatBits << 8)); + SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT)); + SDOperand Ops[8]; + for (int i = 0; i < 8; ++i) Ops[i] = T; + return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8); + } + case MVT::v4i32: { + unsigned int Value = SplatBits; + SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT)); + return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T); + } + case MVT::v2i64: { + uint64_t val = SplatBits; + uint32_t upper = uint32_t(val >> 32); + uint32_t lower = uint32_t(val); + + if (val != 0) { + SDOperand LO32; + SDOperand HI32; + SmallVector<SDOperand, 16> ShufBytes; + SDOperand Result; + bool upper_special, lower_special; + + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + + // Create lower vector if not a special pattern + if (!lower_special) { + SDOperand LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } + + // Create upper vector if not a special pattern + if (!upper_special) { + SDOperand HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } + + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + if (lower_special && upper_special) { + // Unhappy situation... both upper and lower are special, so punt with + // a target constant: + SDOperand Zero = DAG.getConstant(0, MVT::i32); + HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, + Zero, Zero); + } + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + SDOperand V; + bool process_upper, process_lower; + uint64_t val; + + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val = 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val = 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val = (j == 0 ? 0xe0 : 0x80); + } else + val = i * 4 + j + ((i & 1) * 16); + + ShufBytes.push_back(DAG.getConstant(val, MVT::i8)); + } + } + + return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + &ShufBytes[0], ShufBytes.size())); + } else { + // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR + SDOperand Zero = DAG.getConstant(0, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + Zero, Zero, Zero, Zero)); + } + } + } + + return SDOperand(); +} + +/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on +/// which the Cell can operate. The code inspects V3 to ascertain whether the +/// permutation vector, V3, is monotonically increasing with one "exception" +/// element, e.g., (0, 1, _, 3). If this is the case, then generate a +/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. +/// In either case, the net result is going to eventually invoke SHUFB to +/// permute/shuffle the bytes from V1 and V2. +/// \note +/// INSERT_MASK is eventually selected as one of the C*D instructions, generate +/// control word for byte/halfword/word insertion. This takes care of a single +/// element move from V2 into V1. +/// \note +/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. +static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { + SDOperand V1 = Op.getOperand(0); + SDOperand V2 = Op.getOperand(1); + SDOperand PermMask = Op.getOperand(2); + + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + // If we have a single element being moved from V1 to V2, this can be handled + // using the C*[DX] compute mask instructions, but the vector elements have + // to be monotonically increasing with one exception element. + MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType()); + unsigned EltsFromV2 = 0; + unsigned V2Elt = 0; + unsigned V2EltIdx0 = 0; + unsigned CurrElt = 0; + bool monotonic = true; + if (EltVT == MVT::i8) + V2EltIdx0 = 16; + else if (EltVT == MVT::i16) + V2EltIdx0 = 8; + else if (EltVT == MVT::i32) + V2EltIdx0 = 4; + else + assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); + + for (unsigned i = 0, e = PermMask.getNumOperands(); + EltsFromV2 <= 1 && monotonic && i != e; + ++i) { + unsigned SrcElt; + if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) + SrcElt = 0; + else + SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); + + if (SrcElt >= V2EltIdx0) { + ++EltsFromV2; + V2Elt = (V2EltIdx0 - SrcElt) << 2; + } else if (CurrElt != SrcElt) { + monotonic = false; + } + + ++CurrElt; + } + + if (EltsFromV2 == 1 && monotonic) { + // Compute mask and shuffle + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Initialize temporary register to 0 + SDOperand InitTempReg = + DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT)); + // Copy register's contents as index in INSERT_MASK: + SDOperand ShufMaskOp = + DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(), + DAG.getTargetConstant(V2Elt, MVT::i32), + DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); + // Use shuffle mask in SHUFB synthetic instruction: + return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); + } else { + // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes. + unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; + + SmallVector<SDOperand, 16> ResultMask; + for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { + unsigned SrcElt; + if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) + SrcElt = 0; + else + SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); + + for (unsigned j = 0; j != BytesPerElement; ++j) { + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, + MVT::i8)); + } + } + + SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + &ResultMask[0], ResultMask.size()); + return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask); + } +} + +static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { + SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar + + if (Op0.Val->getOpcode() == ISD::Constant) { + // For a constant, build the appropriate constant vector, which will + // eventually simplify to a vector register load. + + ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val); + SmallVector<SDOperand, 16> ConstVecValues; + MVT::ValueType VT; + size_t n_copies; + + // Create a constant vector: + switch (Op.getValueType()) { + default: assert(0 && "Unexpected constant value type in " + "LowerSCALAR_TO_VECTOR"); + case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; + case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; + case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; + case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; + case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; + case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; + } + + SDOperand CValue = DAG.getConstant(CN->getValue(), VT); + for (size_t j = 0; j < n_copies; ++j) + ConstVecValues.push_back(CValue); + + return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(), + &ConstVecValues[0], ConstVecValues.size()); + } else { + // Otherwise, copy the value from one register to another: + switch (Op0.getValueType()) { + default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + case MVT::f32: + case MVT::f64: + return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0); + } + } + + return SDOperand(); +} + +static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) { + switch (Op.getValueType()) { + case MVT::v4i32: { + SDOperand rA = Op.getOperand(0); + SDOperand rB = Op.getOperand(1); + SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB); + SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA); + SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB); + SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1); + + return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2); + break; + } + + // Multiply two v8i16 vectors (pipeline friendly version): + // a) multiply lower halves, mask off upper 16-bit of 32-bit product + // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) + // c) Use SELB to select upper and lower halves from the intermediate results + // + // NOTE: We really want to move the FSMBI to earlier to actually get the + // dual-issue. This code does manage to do this, even if it's a little on + // the wacky side + case MVT::v8i16: { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + SDOperand Chain = Op.getOperand(0); + SDOperand rA = Op.getOperand(0); + SDOperand rB = Op.getOperand(1); + unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + + SDOperand FSMBOp = + DAG.getCopyToReg(Chain, FSMBIreg, + DAG.getNode(SPUISD::FSMBI, MVT::v8i16, + DAG.getConstant(0xcccc, MVT::i32))); + + SDOperand HHProd = + DAG.getCopyToReg(FSMBOp, HiProdReg, + DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); + + SDOperand HHProd_v4i32 = + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, + DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); + + return DAG.getNode(SPUISD::SELB, MVT::v8i16, + DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), + DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), + DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, + HHProd_v4i32, + DAG.getConstant(16, MVT::i16))), + DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); + } + + // This M00sE is N@stI! (apologies to Monty Python) + // + // SPU doesn't know how to do any 8-bit multiplication, so the solution + // is to break it all apart, sign extend, and reassemble the various + // intermediate products. + case MVT::v16i8: { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + SDOperand Chain = Op.getOperand(0); + SDOperand rA = Op.getOperand(0); + SDOperand rB = Op.getOperand(1); + SDOperand c8 = DAG.getConstant(8, MVT::i8); + SDOperand c16 = DAG.getConstant(16, MVT::i8); + + unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + + SDOperand LLProd = + DAG.getNode(SPUISD::MPY, MVT::v8i16, + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); + + SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); + + SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); + + SDOperand LHProd = + DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, + DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); + + SDOperand FSMBdef_2222 = + DAG.getCopyToReg(Chain, FSMBreg_2222, + DAG.getNode(SPUISD::FSMBI, MVT::v8i16, + DAG.getConstant(0x2222, MVT::i32))); + + SDOperand FSMBuse_2222 = + DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32); + + SDOperand LoProd_1 = + DAG.getCopyToReg(Chain, LoProd_reg, + DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd, + FSMBuse_2222)); + + SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32); + + SDOperand LoProd = + DAG.getNode(ISD::AND, MVT::v4i32, + DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32), + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + LoProdMask, LoProdMask, + LoProdMask, LoProdMask)); + + SDOperand rAH = + DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); + + SDOperand rBH = + DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); + + SDOperand HLProd = + DAG.getNode(SPUISD::MPY, MVT::v8i16, + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); + + SDOperand HHProd_1 = + DAG.getNode(SPUISD::MPY, MVT::v8i16, + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, + DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)), + DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, + DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8))); + + SDOperand HHProd = + DAG.getCopyToReg(Chain, HiProd_reg, + DAG.getNode(SPUISD::SELB, MVT::v8i16, + HLProd, + DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), + FSMBuse_2222)); + + SDOperand HiProd = + DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, + DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16); + + return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, + DAG.getNode(ISD::OR, MVT::v4i32, + LoProd, HiProd)); + } + + default: + cerr << "CellSPU: Unknown vector multiplication, got " + << MVT::getValueTypeString(Op.getValueType()) + << "\n"; + abort(); + /*NOTREACHED*/ + } + + return SDOperand(); +} + +static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + + SDOperand A = Op.getOperand(0); + SDOperand B = Op.getOperand(1); + unsigned VT = Op.getValueType(); + + unsigned VRegBR, VRegC; + + if (VT == MVT::f32) { + VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass); + VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass); + } else { + VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass); + } + // TODO: make sure we're feeding FPInterp the right arguments + // Right now: fi B, frest(B) + + // Computes BRcpl = + // (Floating Interpolate (FP Reciprocal Estimate B)) + SDOperand BRcpl = + DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, + DAG.getNode(SPUISD::FPInterp, VT, B, + DAG.getNode(SPUISD::FPRecipEst, VT, B))); + + // Computes A * BRcpl and stores in a temporary register + SDOperand AxBRcpl = + DAG.getCopyToReg(BRcpl, VRegC, + DAG.getNode(ISD::FMUL, VT, A, + DAG.getCopyFromReg(BRcpl, VRegBR, VT))); + // What's the Chain variable do? It's magic! + // TODO: set Chain = Op(0).getEntryNode() + + return DAG.getNode(ISD::FADD, VT, + DAG.getCopyFromReg(AxBRcpl, VRegC, VT), + DAG.getNode(ISD::FMUL, VT, + DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), + DAG.getNode(ISD::FSUB, VT, A, + DAG.getNode(ISD::FMUL, VT, B, + DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); +} + +// Expands double-precision FDIV +// Expects two doubles as inputs X and Y, does a floating point +// reciprocal estimate, and three iterations of Newton-Raphson +// to increase accuracy. +//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) { +// MachineFunction &MF = DAG.getMachineFunction(); +// SSARegMap *RegMap = MF.getSSARegMap(); +// +// SDOperand X = Op.getOperand(0); +// SDOperand Y = Op.getOperand(1); +//} + +static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { + unsigned VT = Op.getValueType(); + SDOperand N = Op.getOperand(0); + SDOperand Elt = Op.getOperand(1); + SDOperand ShufMask[16]; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt); + + assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode"); + + int EltNo = (int) C->getValue(); + + // sanity checks: + if (VT == MVT::i8 && EltNo >= 16) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); + else if (VT == MVT::i16 && EltNo >= 8) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); + else if (VT == MVT::i32 && EltNo >= 4) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); + else if (VT == MVT::i64 && EltNo >= 2) + assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); + + if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { + // i32 and i64: Element 0 is the preferred slot + return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N); + } + + // Need to generate shuffle mask and extract: + int prefslot_begin, prefslot_end; + int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8; + + switch (VT) { + case MVT::i8: { + prefslot_begin = prefslot_end = 3; + break; + } + case MVT::i16: { + prefslot_begin = 2; prefslot_end = 3; + break; + } + case MVT::i32: { + prefslot_begin = 0; prefslot_end = 3; + break; + } + case MVT::i64: { + prefslot_begin = 0; prefslot_end = 7; + break; + } + } + + for (int i = 0; i < 16; ++i) { + // zero fill uppper part of preferred slot, don't care about the + // other slots: + unsigned int mask_val; + + if (i <= prefslot_end) { + mask_val = + ((i < prefslot_begin) + ? 0x80 + : elt_byte + (i - prefslot_begin)); + + ShufMask[i] = DAG.getConstant(mask_val, MVT::i16); + } else + ShufMask[i] = ShufMask[i % (prefslot_end + 1)]; + } + + SDOperand ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + &ShufMask[0], + sizeof(ShufMask) / sizeof(ShufMask[0])); + + return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, + DAG.getNode(SPUISD::SHUFB, N.getValueType(), + N, N, ShufMaskVec)); + +} + +static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { + SDOperand VecOp = Op.getOperand(0); + SDOperand ValOp = Op.getOperand(1); + SDOperand IdxOp = Op.getOperand(2); + MVT::ValueType VT = Op.getValueType(); + + ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); + assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Use $2 because it's always 16-byte aligned and it's available: + SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT); + + SDOperand result = + DAG.getNode(SPUISD::SHUFB, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp), + VecOp, + DAG.getNode(SPUISD::INSERT_MASK, VT, + DAG.getNode(ISD::ADD, PtrVT, + PtrBase, + DAG.getConstant(CN->getValue(), + PtrVT)))); + + return result; +} + +static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) { + SDOperand N0 = Op.getOperand(0); // Everything has at least one operand + + assert(Op.getValueType() == MVT::i8); + switch (Opc) { + default: + assert(0 && "Unhandled i8 math operator"); + /*NOTREACHED*/ + break; + case ISD::SUB: { + // 8-bit subtraction: Promote the arguments up to 16-bits and truncate + // the result: + SDOperand N1 = Op.getOperand(1); + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16)); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) + : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16)); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, N0, N1)); + } + case ISD::ROTR: + case ISD::ROTL: { + SDOperand N1 = Op.getOperand(1); + unsigned N1Opc; + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16)); + N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(N1Opc, MVT::i16, N1) + : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16)); + SDOperand ExpandArg = + DAG.getNode(ISD::OR, MVT::i16, N0, + DAG.getNode(ISD::SHL, MVT::i16, + N0, DAG.getConstant(8, MVT::i16))); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, ExpandArg, N1)); + } + case ISD::SRL: + case ISD::SHL: { + SDOperand N1 = Op.getOperand(1); + unsigned N1Opc; + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16)); + N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(N1Opc, MVT::i16, N1) + : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16)); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, N0, N1)); + } + case ISD::SRA: { + SDOperand N1 = Op.getOperand(1); + unsigned N1Opc; + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16)); + N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(N1Opc, MVT::i16, N1) + : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16)); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, N0, N1)); + } + case ISD::MUL: { + SDOperand N1 = Op.getOperand(1); + unsigned N1Opc; + N0 = (N0.getOpcode() != ISD::Constant + ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) + : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16)); + N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE); + N1 = (N1.getOpcode() != ISD::Constant + ? DAG.getNode(N1Opc, MVT::i16, N1) + : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16)); + return DAG.getNode(ISD::TRUNCATE, MVT::i8, + DAG.getNode(Opc, MVT::i16, N0, N1)); + break; + } + } + + return SDOperand(); +} + +//! Lower byte immediate operations for v16i8 vectors: +static SDOperand +LowerByteImmed(SDOperand Op, SelectionDAG &DAG) { + SDOperand ConstVec; + SDOperand Arg; + MVT::ValueType VT = Op.getValueType(); + + ConstVec = Op.getOperand(0); + Arg = Op.getOperand(1); + if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) { + if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) { + ConstVec = ConstVec.getOperand(0); + } else { + ConstVec = Op.getOperand(1); + Arg = Op.getOperand(0); + if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) { + ConstVec = ConstVec.getOperand(0); + } + } + } + + if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) { + uint64_t VectorBits[2]; + uint64_t UndefBits[2]; + uint64_t SplatBits, SplatUndef; + int SplatSize; + + if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits) + && isConstantSplat(VectorBits, UndefBits, + MVT::getSizeInBits(MVT::getVectorElementType(VT)), + SplatBits, SplatUndef, SplatSize)) { + SDOperand tcVec[16]; + SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); + const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]); + + // Turn the BUILD_VECTOR into a set of target constants: + for (size_t i = 0; i < tcVecSize; ++i) + tcVec[i] = tc; + + return DAG.getNode(Op.Val->getOpcode(), VT, Arg, + DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize)); + } + } + + return SDOperand(); +} + +//! Lower i32 multiplication +static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT, + unsigned Opc) { + switch (VT) { + default: + cerr << "CellSPU: Unknown LowerMUL value type, got " + << MVT::getValueTypeString(Op.getValueType()) + << "\n"; + abort(); + /*NOTREACHED*/ + + case MVT::i32: { + SDOperand rA = Op.getOperand(0); + SDOperand rB = Op.getOperand(1); + + return DAG.getNode(ISD::ADD, MVT::i32, + DAG.getNode(ISD::ADD, MVT::i32, + DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB), + DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)), + DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB)); + } + } + + return SDOperand(); +} + +//! Custom lowering for CTPOP (count population) +/*! + Custom lowering code that counts the number ones in the input + operand. SPU has such an instruction, but it counts the number of + ones per byte, which then have to be accumulated. +*/ +static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) { + unsigned VT = Op.getValueType(); + unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + + switch (VT) { + case MVT::i8: { + SDOperand N = Op.getOperand(0); + SDOperand Elt0 = DAG.getConstant(0, MVT::i32); + + SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0); + } + + case MVT::i16: { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + + unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass); + + SDOperand N = Op.getOperand(0); + SDOperand Elt0 = DAG.getConstant(0, MVT::i16); + SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16); + SDOperand Shift1 = DAG.getConstant(8, MVT::i16); + + SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + + // CNTB_result becomes the chain to which all of the virtual registers + // CNTB_reg, SUM1_reg become associated: + SDOperand CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0); + + SDOperand CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + + SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16); + + return DAG.getNode(ISD::AND, MVT::i16, + DAG.getNode(ISD::ADD, MVT::i16, + DAG.getNode(ISD::SRL, MVT::i16, + Tmp1, Shift1), + Tmp1), + Mask0); + } + + case MVT::i32: { + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + + unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass); + unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass); + + SDOperand N = Op.getOperand(0); + SDOperand Elt0 = DAG.getConstant(0, MVT::i32); + SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32); + SDOperand Shift1 = DAG.getConstant(16, MVT::i32); + SDOperand Shift2 = DAG.getConstant(8, MVT::i32); + + SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + + // CNTB_result becomes the chain to which all of the virtual registers + // CNTB_reg, SUM1_reg become associated: + SDOperand CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0); + + SDOperand CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + + SDOperand Comp1 = + DAG.getNode(ISD::SRL, MVT::i32, + DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1); + + SDOperand Sum1 = + DAG.getNode(ISD::ADD, MVT::i32, + Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32)); + + SDOperand Sum1_rescopy = + DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1); + + SDOperand Comp2 = + DAG.getNode(ISD::SRL, MVT::i32, + DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32), + Shift2); + SDOperand Sum2 = + DAG.getNode(ISD::ADD, MVT::i32, Comp2, + DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32)); + + return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0); + } + + case MVT::i64: + break; + } + + return SDOperand(); +} + +/// LowerOperation - Provide custom lowering hooks for some operations. +/// +SDOperand +SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) +{ + switch (Op.getOpcode()) { + default: { + cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; + cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n"; + cerr << "*Op.Val:\n"; + Op.Val->dump(); + abort(); + } + case ISD::LOAD: + case ISD::SEXTLOAD: + case ISD::ZEXTLOAD: + return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::STORE: + return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::Constant: + return LowerConstant(Op, DAG); + case ISD::ConstantFP: + return LowerConstantFP(Op, DAG); + case ISD::FORMAL_ARGUMENTS: + return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); + case ISD::CALL: + return LowerCALL(Op, DAG); + case ISD::RET: + return LowerRET(Op, DAG, getTargetMachine()); + + // i8 math ops: + case ISD::SUB: + case ISD::ROTR: + case ISD::ROTL: + case ISD::SRL: + case ISD::SHL: + case ISD::SRA: + return LowerI8Math(Op, DAG, Op.getOpcode()); + + // Vector-related lowering. + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + + // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return LowerByteImmed(Op, DAG); + + // Vector and i8 multiply: + case ISD::MUL: + if (MVT::isVector(Op.getValueType())) + return LowerVectorMUL(Op, DAG); + else if (Op.getValueType() == MVT::i8) + return LowerI8Math(Op, DAG, Op.getOpcode()); + else + return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode()); + + case ISD::FDIV: + if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32) + return LowerFDIVf32(Op, DAG); +// else if (Op.getValueType() == MVT::f64) +// return LowerFDIVf64(Op, DAG); + else + assert(0 && "Calling FDIV on unsupported MVT"); + + case ISD::CTPOP: + return LowerCTPOP(Op, DAG); + } + + return SDOperand(); +} + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, + MachineBasicBlock *BB) +{ + return BB; +} + +//===----------------------------------------------------------------------===// +// Target Optimization Hooks +//===----------------------------------------------------------------------===// + +SDOperand +SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const +{ +#if 0 + TargetMachine &TM = getTargetMachine(); + SelectionDAG &DAG = DCI.DAG; +#endif + SDOperand N0 = N->getOperand(0); // everything has at least one operand + + switch (N->getOpcode()) { + default: break; + + // Look for obvious optimizations for shift left: + // a) Replace 0 << V with 0 + // b) Replace V << 0 with V + // + // N.B: llvm will generate an undef node if the shift amount is greater than + // 15 (e.g.: V << 16), which will naturally trigger an assert. + case SPU::SHLIr32: + case SPU::SHLHIr16: + case SPU::SHLQBIIvec: + case SPU::ROTHIr16: + case SPU::ROTHIr16_i32: + case SPU::ROTIr32: + case SPU::ROTIr32_i16: + case SPU::ROTQBYIvec: + case SPU::ROTQBYBIvec: + case SPU::ROTQBIIvec: + case SPU::ROTHMIr16: + case SPU::ROTMIr32: + case SPU::ROTQMBYIvec: { + if (N0.getOpcode() == ISD::Constant) { + if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) { + if (C->getValue() == 0) // 0 << V -> 0. + return N0; + } + } + SDOperand N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::Constant) { + if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) { + if (C->getValue() == 0) // V << 0 -> V + return N1; + } + } + break; + } + } + + return SDOperand(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +SPUTargetLowering::ConstraintType +SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { + if (ConstraintLetter.size() == 1) { + switch (ConstraintLetter[0]) { + default: break; + case 'b': + case 'r': + case 'f': + case 'v': + case 'y': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(ConstraintLetter); +} + +std::pair<unsigned, const TargetRegisterClass*> +SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const +{ + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'b': // R1-R31 + case 'r': // R0-R31 + if (VT == MVT::i64) + return std::make_pair(0U, SPU::R64CRegisterClass); + return std::make_pair(0U, SPU::R32CRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, SPU::R32FPRegisterClass); + else if (VT == MVT::f64) + return std::make_pair(0U, SPU::R64FPRegisterClass); + break; + case 'v': + return std::make_pair(0U, SPU::GPRCRegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +void +SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, + uint64_t Mask, + uint64_t &KnownZero, + uint64_t &KnownOne, + const SelectionDAG &DAG, + unsigned Depth ) const { + KnownZero = 0; + KnownOne = 0; +} + +// LowerAsmOperandForConstraint +void +SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, + char ConstraintLetter, + std::vector<SDOperand> &Ops, + SelectionDAG &DAG) { + // Default, for the time being, to the base class handler + TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); +} + +/// isLegalAddressImmediate - Return true if the integer value can be used +/// as the offset of the target addressing mode. +bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const { + // SPU's addresses are 256K: + return (V > -(1 << 18) && V < (1 << 18) - 1); +} + +bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { + return false; +} |