diff options
author | Scott Michel <scottm@aero.org> | 2008-01-11 02:53:15 +0000 |
---|---|---|
committer | Scott Michel <scottm@aero.org> | 2008-01-11 02:53:15 +0000 |
commit | 9de5d0dd42463f61c4ee2f9db5f3d08153c0dacf (patch) | |
tree | 0405fe251d353f90861768223bfb36945e3b40fe /lib | |
parent | c37ab63df71e425951ce7a8c797540a18d0a3e63 (diff) | |
download | external_llvm-9de5d0dd42463f61c4ee2f9db5f3d08153c0dacf.zip external_llvm-9de5d0dd42463f61c4ee2f9db5f3d08153c0dacf.tar.gz external_llvm-9de5d0dd42463f61c4ee2f9db5f3d08153c0dacf.tar.bz2 |
More CellSPU refinement and progress:
- Cleaned up custom load/store logic, common code is now shared [see note
below], cleaned up address modes
- More test cases: various intrinsics, structure element access (load/store
test), updated target data strings, indirect function calls.
Note: This patch contains a refactoring of the LoadSDNode and StoreSDNode
structures: they now share a common base class, LSBaseSDNode, that
provides an interface to their common functionality. There is some hackery
to access the proper operand depending on the derived class; otherwise,
to do a proper job would require finding and rearranging the SDOperands
sent to StoreSDNode's constructor. The current refactor errs on the
side of being conservatively and backwardly compatible while providing
functionality that reduces redundant code for targets where loads and
stores are custom-lowered.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45851 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/CellSPU/CellSDKIntrinsics.td | 16 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 125 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 492 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.td | 54 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUNodes.td | 6 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUOperands.td | 17 |
7 files changed, 370 insertions, 343 deletions
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td index cfa0089..2f453b1 100644 --- a/lib/Target/CellSPU/CellSDKIntrinsics.td +++ b/lib/Target/CellSPU/CellSDKIntrinsics.td @@ -108,18 +108,18 @@ def CellSDKmpyhhau: def CellSDKand: RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "add\t $rT, $rA, $rB", IntegerOp, + "and\t $rT, $rA, $rB", IntegerOp, [(set (v4i32 VECREG:$rT), (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CellSDKandc: RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "addc\t $rT, $rA, $rB", IntegerOp, + "andc\t $rT, $rA, $rB", IntegerOp, [(set (v4i32 VECREG:$rT), (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CellSDKandbi: - RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "andbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -149,7 +149,7 @@ def CellSDKorc: (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CellSDKorbi: - RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "orbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -173,7 +173,7 @@ def CellSDKxor: (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def CellSDKxorbi: - RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "xorbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -248,7 +248,7 @@ def CellSDKceqb: (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; def CellSDKceqbi: - RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "ceqbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -294,7 +294,7 @@ def CellSDKcgtb: (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; def CellSDKcgtbi: - RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "cgtbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>; @@ -329,7 +329,7 @@ def CellSDKclgtb: (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; def CellSDKclgtbi: - RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), "clgtbi\t $rT, $rA, $val", BranchResolv, [(set (v16i8 VECREG:$rT), (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 73e46ff..bb3b100 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -384,11 +384,17 @@ bool SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, SDOperand &Index) { // These match the addr256k operand type: - MVT::ValueType PtrVT = SPUtli.getPointerTy(); MVT::ValueType OffsVT = MVT::i16; + MVT::ValueType PtrVT = SPUtli.getPointerTy(); switch (N.getOpcode()) { case ISD::Constant: + case ISD::ConstantPool: + case ISD::GlobalAddress: + cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n"; + abort(); + /*NOTREACHED*/ + case ISD::TargetConstant: { // Loading from a constant address. ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); @@ -400,23 +406,15 @@ SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, return true; } } - case ISD::ConstantPool: - case ISD::TargetConstantPool: { - // The constant pool address is N. Base is a dummy that will be ignored by + case ISD::TargetGlobalAddress: + case ISD::TargetConstantPool: + case SPUISD::AFormAddr: { + // The address is in Base. N is a dummy that will be ignored by // the assembly printer. Base = N; Index = CurDAG->getTargetConstant(0, OffsVT); return true; } - - case ISD::GlobalAddress: - case ISD::TargetGlobalAddress: { - // The global address is N. Base is a dummy that is ignored by the - // assembly printer. - Base = N; - Index = CurDAG->getTargetConstant(0, OffsVT); - return true; - } } return false; @@ -445,10 +443,9 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, Index = CurDAG->getTargetConstant(0, PtrTy); return true; } else if (Opc == ISD::FrameIndex) { - // Stack frame index must be less than 512 (divided by 16): FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N); DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " - << FI->getIndex() << "\n"); + << FI->getIndex() << "\n"); if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { Base = CurDAG->getTargetConstant(0, PtrTy); Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); @@ -458,45 +455,49 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, // Generated by getelementptr const SDOperand Op0 = N.getOperand(0); // Frame index/base const SDOperand Op1 = N.getOperand(1); // Offset within base - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1); - // Not a constant? - if (CN == 0) - return false; - - int32_t offset = (int32_t) CN->getSignExtended(); - unsigned Opc0 = Op0.getOpcode(); - - if ((offset & 0xf) != 0) { - cerr << "SelectDFormAddr: unaligned offset = " << offset << "\n"; - abort(); - /*NOTREACHED*/ - } + if (Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1); + assert(CN != 0 && "SelectDFormAddr: Expected a constant"); - if (Opc0 == ISD::FrameIndex) { - FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0); - DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset - << " frame index = " << FI->getIndex() << "\n"); + int32_t offset = (int32_t) CN->getSignExtended(); + unsigned Opc0 = Op0.getOpcode(); - if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); - return true; + if ((offset & 0xf) != 0) { + // Unaligned offset: punt and let X-form address handle it. + // NOTE: This really doesn't have to be strictly 16-byte aligned, + // since the load/store quadword instructions will implicitly + // zero the lower 4 bits of the resulting address. + return false; } - } else if (offset > SPUFrameInfo::minFrameOffset() - && offset < SPUFrameInfo::maxFrameOffset()) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - if (Opc0 == ISD::GlobalAddress) { - // Convert global address to target global address - GlobalAddressSDNode *GV = dyn_cast<GlobalAddressSDNode>(Op0); - Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy); - return true; - } else { - // Otherwise, just take operand 0 - Index = Op0; - return true; + + if (Opc0 == ISD::FrameIndex) { + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0); + DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI->getIndex() << "\n"); + + if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); + return true; + } + } else if (offset > SPUFrameInfo::minFrameOffset() + && offset < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + if (Opc0 == ISD::GlobalAddress) { + // Convert global address to target global address + GlobalAddressSDNode *GV = dyn_cast<GlobalAddressSDNode>(Op0); + Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy); + return true; + } else { + // Otherwise, just take operand 0 + Index = Op0; + return true; + } } - } + } else + return false; } else if (Opc == SPUISD::DFormAddr) { // D-Form address: This is pretty straightforward, naturally... ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); @@ -504,6 +505,16 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); Index = N.getOperand(0); return true; + } else if (Opc == ISD::FrameIndex) { + // Stack frame index must be less than 512 (divided by 16): + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N); + DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " + << FI->getIndex() << "\n"); + if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); + return true; + } } return false; @@ -535,7 +546,8 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, unsigned N2Opc = N2.getOpcode(); if ((N1Opc == SPUISD::Hi && N2Opc == SPUISD::Lo) - || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)) { + || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi) + || (N1Opc == SPUISD::XFormAddr)) { Base = N.getOperand(0); Index = N.getOperand(1); return true; @@ -548,6 +560,10 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, abort(); /*UNREACHED*/ } + } else if (Opc == SPUISD::XFormAddr) { + Base = N; + Index = N.getOperand(1); + return true; } else if (N.getNumOperands() == 2) { SDOperand N1 = N.getOperand(0); SDOperand N2 = N.getOperand(1); @@ -591,11 +607,14 @@ SPUDAGToDAGISel::Select(SDOperand Op) { } else if (Opc == ISD::FrameIndex) { // Selects to AIr32 FI, 0 which in turn will become AIr32 SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDOperand TFI = CurDAG->getTargetFrameIndex(FI, SPUtli.getPointerTy()); + MVT::ValueType PtrVT = SPUtli.getPointerTy(); + SDOperand Zero = CurDAG->getTargetConstant(0, PtrVT); + SDOperand TFI = CurDAG->getTargetFrameIndex(FI, PtrVT); DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AI32 <FI>, 0\n"); - return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, - CurDAG->getTargetConstant(0, MVT::i32)); + if (N->hasOneUse()) + return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, Zero); + CurDAG->getTargetNode(SPU::AIr32, Op.getValueType(), TFI, Zero); } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT unsigned VT = N->getValueType(0); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 7893e67..59e2068 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -82,7 +82,7 @@ namespace { /*! \arg Op Operand to test \return true if the operand is a memory target (i.e., global - address, external symbol, constant pool) or an existing D-Form + address, external symbol, constant pool) or an A-form address. */ bool isMemoryOperand(const SDOperand &Op) @@ -90,17 +90,17 @@ namespace { const unsigned Opc = Op.getOpcode(); return (Opc == ISD::GlobalAddress || Opc == ISD::GlobalTLSAddress - || Opc == ISD::FrameIndex + /* || Opc == ISD::FrameIndex */ || Opc == ISD::JumpTable || Opc == ISD::ConstantPool || Opc == ISD::ExternalSymbol || Opc == ISD::TargetGlobalAddress || Opc == ISD::TargetGlobalTLSAddress - || Opc == ISD::TargetFrameIndex + /* || Opc == ISD::TargetFrameIndex */ || Opc == ISD::TargetJumpTable || Opc == ISD::TargetConstantPool || Opc == ISD::TargetExternalSymbol - || Opc == SPUISD::DFormAddr); + || Opc == SPUISD::AFormAddr); } } @@ -356,7 +356,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::OR, MVT::v16i8, Custom); setOperationAction(ISD::XOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); - + setSetCCResultType(MVT::i32); setShiftAmountType(MVT::i32); setSetCCResultContents(ZeroOrOneSetCCResult); @@ -377,6 +377,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; + node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr"; node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr"; node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; @@ -430,6 +431,105 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const // LowerOperation implementation //===----------------------------------------------------------------------===// +/// Aligned load common code for CellSPU +/*! + \param[in] Op The SelectionDAG load or store operand + \param[in] DAG The selection DAG + \param[in] ST CellSPU subtarget information structure + \param[in,out] alignment Caller initializes this to the load or store node's + value from getAlignment(), may be updated while generating the aligned load + \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned + offset (divisible by 16, modulo 16 == 0) + \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the + offset of the preferred slot (modulo 16 != 0) + \param[in,out] VT Caller initializes this value type to the the load or store + node's loaded or stored value type; may be updated if an i1-extended load or + store. + \param[out] was16aligned true if the base pointer had 16-byte alignment, + otherwise false. Can help to determine if the chunk needs to be rotated. + + Both load and store lowering load a block of data aligned on a 16-byte + boundary. This is the common aligned load code shared between both. + */ +static SDOperand +AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, + LSBaseSDNode *LSN, + unsigned &alignment, int &alignOffs, int &prefSlotOffs, + unsigned &VT, bool &was16aligned) +{ + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + const valtype_map_s *vtm = getValueTypeMapEntry(VT); + SDOperand basePtr = LSN->getBasePtr(); + SDOperand chain = LSN->getChain(); + + if (basePtr.getOpcode() == ISD::ADD) { + SDOperand Op1 = basePtr.Val->getOperand(1); + + if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) { + const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1)); + + alignOffs = (int) CN->getValue(); + prefSlotOffs = (int) (alignOffs & 0xf); + + // Adjust the rotation amount to ensure that the final result ends up in + // the preferred slot: + prefSlotOffs -= vtm->prefslot_byte; + basePtr = basePtr.getOperand(0); + + // Modify alignment, since the ADD is likely from getElementPtr: + switch (basePtr.getOpcode()) { + case ISD::GlobalAddress: + case ISD::TargetGlobalAddress: { + GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val); + const GlobalValue *GV = GN->getGlobal(); + alignment = GV->getAlignment(); + break; + } + } + } else { + alignOffs = 0; + prefSlotOffs = -vtm->prefslot_byte; + } + } else { + alignOffs = 0; + prefSlotOffs = -vtm->prefslot_byte; + } + + if (alignment == 16) { + // Realign the base pointer as a D-Form address: + if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) { + if (isMemoryOperand(basePtr)) { + SDOperand Zero = DAG.getConstant(0, PtrVT); + unsigned Opc = (!ST->usingLargeMem() + ? SPUISD::AFormAddr + : SPUISD::XFormAddr); + basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero); + } + basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, + basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT)); + } + + // Emit the vector load: + was16aligned = true; + return DAG.getLoad(MVT::v16i8, chain, basePtr, + LSN->getSrcValue(), LSN->getSrcValueOffset(), + LSN->isVolatile(), 16); + } + + // Unaligned load or we're using the "large memory" model, which means that + // we have to be very pessimistic: + if (isMemoryOperand(basePtr)) { + basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); + } + + // Add the offset + basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT)); + was16aligned = false; + return DAG.getLoad(MVT::v16i8, chain, basePtr, + LSN->getSrcValue(), LSN->getSrcValueOffset(), + LSN->isVolatile(), 16); +} + /// Custom lower loads for CellSPU /*! All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements @@ -438,22 +538,13 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const static SDOperand LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { LoadSDNode *LN = cast<LoadSDNode>(Op); - SDOperand basep = LN->getBasePtr(); SDOperand the_chain = LN->getChain(); - MVT::ValueType BasepOpc = basep.Val->getOpcode(); MVT::ValueType VT = LN->getLoadedVT(); MVT::ValueType OpVT = Op.Val->getValueType(0); - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); - const valtype_map_s *vtm = getValueTypeMapEntry(VT); SDOperand Ops[8]; - if (BasepOpc == ISD::FrameIndex) { - // Loading from a frame index is always properly aligned. Always. - return SDOperand(); - } - // For an extending load of an i1 variable, just call it i8 (or whatever we // were passed) and make it zero-extended: if (VT == MVT::i1) { @@ -463,178 +554,76 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { switch (LN->getAddressingMode()) { case ISD::UNINDEXED: { - SDOperand result; - SDOperand rot_op, rotamt; - SDOperand ptrp; - int c_offset; - int c_rotamt; - - // The vector type we really want to be when we load the 16-byte chunk - MVT::ValueType vecVT, opVecVT; - - vecVT = MVT::v16i8; - if (VT != MVT::i1) - vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); - opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT))); + int offset, rotamt; + bool was16aligned; + SDOperand result = + AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned); - if (basep.getOpcode() == ISD::ADD) { - const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1)); + if (result.Val == 0) + return result; - assert(CN != NULL - && "LowerLOAD: ISD::ADD operand 1 is not constant"); + the_chain = result.getValue(1); + // Rotate the chunk if necessary + if (rotamt < 0) + rotamt += 16; + if (rotamt != 0) { + SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); + + if (was16aligned) { + Ops[0] = the_chain; + Ops[1] = result; + Ops[2] = DAG.getConstant(rotamt, MVT::i16); + } else { + LoadSDNode *LN1 = cast<LoadSDNode>(result); + Ops[0] = the_chain; + Ops[1] = result; + Ops[2] = LN1->getBasePtr(); + } - c_offset = (int) CN->getValue(); - c_rotamt = (int) (c_offset & 0xf); + result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3); + the_chain = result.getValue(1); + } - // Adjust the rotation amount to ensure that the final result ends up in - // the preferred slot: - c_rotamt -= vtm->prefslot_byte; - ptrp = basep.getOperand(0); + if (VT == OpVT || ExtType == ISD::EXTLOAD) { + SDVTList scalarvts; + MVT::ValueType vecVT = MVT::v16i8; + + // Convert the loaded v16i8 vector to the appropriate vector type + // specified by the operand: + if (OpVT == VT) { + if (VT != MVT::i1) + vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + } else + vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT))); + + Ops[0] = the_chain; + Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result); + scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other); + result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2); + the_chain = result.getValue(1); } else { - c_offset = 0; - c_rotamt = -vtm->prefslot_byte; - ptrp = basep; - } + // Handle the sign and zero-extending loads for i1 and i8: + unsigned NewOpC; - if (alignment == 16) { - // 16-byte aligned load into preferred slot, no rotation - if (c_rotamt == 0) { - if (isMemoryOperand(ptrp)) - // Return unchanged - return SDOperand(); - else { - // Return modified D-Form address for pointer: - ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT, - ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT)); - if (VT == OpVT) - return DAG.getLoad(VT, LN->getChain(), ptrp, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); - else - return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(), - LN->getSrcValueOffset(), OpVT, - LN->isVolatile(), 16); - } + if (ExtType == ISD::SEXTLOAD) { + NewOpC = (OpVT == MVT::i1 + ? SPUISD::EXTRACT_I1_SEXT + : SPUISD::EXTRACT_I8_SEXT); } else { - // Need to rotate... - if (c_rotamt < 0) - c_rotamt += 16; - // Realign the base pointer, with a D-Form address - if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp)) - basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, - ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32)); - else - basep = ptrp; - - // Rotate the load: - rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); - the_chain = rot_op.getValue(1); - rotamt = DAG.getConstant(c_rotamt, MVT::i16); - - SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); - Ops[0] = the_chain; - Ops[1] = rot_op; - Ops[2] = rotamt; - - result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3); - the_chain = result.getValue(1); - - if (VT == OpVT || ExtType == ISD::EXTLOAD) { - SDVTList scalarvts; - Ops[0] = the_chain; - Ops[1] = result; - if (OpVT == VT) { - scalarvts = DAG.getVTList(VT, MVT::Other); - } else { - scalarvts = DAG.getVTList(OpVT, MVT::Other); - } - - result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT), - result); - Ops[0] = the_chain; - Ops[1] = result; - result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2); - the_chain = result.getValue(1); - } else { - // Handle the sign and zero-extending loads for i1 and i8: - unsigned NewOpC; - - if (ExtType == ISD::SEXTLOAD) { - NewOpC = (OpVT == MVT::i1 - ? SPUISD::EXTRACT_I1_SEXT - : SPUISD::EXTRACT_I8_SEXT); - } else { - assert(ExtType == ISD::ZEXTLOAD); - NewOpC = (OpVT == MVT::i1 - ? SPUISD::EXTRACT_I1_ZEXT - : SPUISD::EXTRACT_I8_ZEXT); - } - - result = DAG.getNode(NewOpC, OpVT, result); - } - - SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); - SDOperand retops[2] = { result, the_chain }; - - result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); - return result; - /*UNREACHED*/ - } - } else { - // Misaligned 16-byte load: - if (basep.getOpcode() == ISD::LOAD) { - LN = cast<LoadSDNode>(basep); - if (LN->getAlignment() == 16) { - // We can verify that we're really loading from a 16-byte aligned - // chunk. Encapsulate basep as a D-Form address and return a new - // load: - basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep, - DAG.getConstant(0, PtrVT)); - if (OpVT == VT) - return DAG.getLoad(VT, LN->getChain(), basep, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); - else - return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep, - LN->getSrcValue(), LN->getSrcValueOffset(), - OpVT, LN->isVolatile(), 16); - } + assert(ExtType == ISD::ZEXTLOAD); + NewOpC = (OpVT == MVT::i1 + ? SPUISD::EXTRACT_I1_ZEXT + : SPUISD::EXTRACT_I8_ZEXT); } - // Catch all other cases where we can't guarantee that we have a - // 16-byte aligned entity, which means resorting to an X-form - // address scheme: - - SDOperand ZeroOffs = DAG.getConstant(0, PtrVT); - SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs); - SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs); - - ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp); - - SDOperand alignLoad = - DAG.getLoad(opVecVT, LN->getChain(), ptrp, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); - - SDOperand insertEltOp = - DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp); - - result = DAG.getNode(SPUISD::SHUFB, opVecVT, - alignLoad, - alignLoad, - DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp)); - - result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result); + result = DAG.getNode(NewOpC, OpVT, result); + } - SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); - SDOperand retops[2] = { result, the_chain }; + SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); + SDOperand retops[2] = { result, the_chain }; - result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); - return result; - } - break; + result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); + return result; } case ISD::PRE_INC: case ISD::PRE_DEC: @@ -664,58 +653,31 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT::ValueType VT = Value.getValueType(); MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT()); MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDOperand the_chain = SN->getChain(); - //unsigned alignment = SN->getAlignment(); - //const valtype_map_s *vtm = getValueTypeMapEntry(VT); + unsigned alignment = SN->getAlignment(); switch (SN->getAddressingMode()) { case ISD::UNINDEXED: { - SDOperand basep = SN->getBasePtr(); - SDOperand ptrOp; - int offset; - - if (basep.getOpcode() == ISD::FrameIndex) { - // FrameIndex nodes are always properly aligned. Really. - return SDOperand(); - } - - if (basep.getOpcode() == ISD::ADD) { - const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1)); - assert(CN != NULL - && "LowerSTORE: ISD::ADD operand 1 is not constant"); - offset = unsigned(CN->getValue()); - ptrOp = basep.getOperand(0); - DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = " - << offset - << "\n"); - } else { - ptrOp = basep; - offset = 0; - } + int chunk_offset, slot_offset; + bool was16aligned; // The vector type we really want to load from the 16-byte chunk, except // in the case of MVT::i1, which has to be v16i8. - unsigned vecVT, stVecVT; - + unsigned vecVT, stVecVT = MVT::v16i8; + if (StVT != MVT::i1) stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT))); - else - stVecVT = MVT::v16i8; vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); - // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is - // the actual dform addr offs($reg). - basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp, - DAG.getConstant((offset & ~0xf), PtrVT)); + SDOperand alignLoadVec = + AlignedLoad(Op, DAG, ST, SN, alignment, + chunk_offset, slot_offset, VT, was16aligned); - // Create the 16-byte aligned vector load - SDOperand alignLoad = - DAG.getLoad(vecVT, the_chain, basep, - SN->getSrcValue(), SN->getSrcValueOffset(), - SN->isVolatile(), 16); - the_chain = alignLoad.getValue(1); + if (alignLoadVec.Val == 0) + return alignLoadVec; - LoadSDNode *LN = cast<LoadSDNode>(alignLoad); + LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); + SDOperand basePtr = LN->getBasePtr(); + SDOperand the_chain = alignLoadVec.getValue(1); SDOperand theValue = SN->getValue(); SDOperand result; @@ -727,18 +689,34 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { theValue = theValue.getOperand(0); } - SDOperand insertEltOp = - DAG.getNode(SPUISD::INSERT_MASK, stVecVT, - DAG.getNode(SPUISD::DFormAddr, PtrVT, - ptrOp, - DAG.getConstant((offset & 0xf), PtrVT))); + chunk_offset &= 0xf; + chunk_offset /= (MVT::getSizeInBits(StVT == MVT::i1 ? (unsigned) MVT::i8 : StVT) / 8); + + SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT); + SDOperand insertEltPtr; + SDOperand insertEltOp; + + // If the base pointer is already a D-form address, then just create + // a new D-form address with a slot offset and the orignal base pointer. + // Otherwise generate a D-form address with the slot offset relative + // to the stack pointer, which is always aligned. + if (basePtr.getOpcode() == SPUISD::DFormAddr) { + insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, + basePtr.getOperand(0), + insertEltOffs); + } else { + insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + insertEltOffs); + } + insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr); result = DAG.getNode(SPUISD::SHUFB, vecVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue), - alignLoad, + alignLoadVec, DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp)); - result = DAG.getStore(the_chain, result, basep, + result = DAG.getStore(the_chain, result, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), LN->isVolatile(), LN->getAlignment()); @@ -767,19 +745,23 @@ LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); Constant *C = CP->getConstVal(); SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); - const TargetMachine &TM = DAG.getTarget(); SDOperand Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { // Just return the SDOperand with the constant pool address in it. return CPI; } else { +#if 1 // Generate hi/lo address pair SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); +#else + return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero); +#endif } } @@ -797,16 +779,9 @@ LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { const TargetMachine &TM = DAG.getTarget(); if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - // Just return the SDOperand with the jump table address in it. - return JTI; - } else { - // Generate hi/lo address pair - SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero); - SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero); - - return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); - } + return (!ST->usingLargeMem() + ? JTI + : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero)); } assert(0 && @@ -820,20 +795,13 @@ LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); GlobalValue *GV = GSDN->getGlobal(); SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); - SDOperand Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); + SDOperand Zero = DAG.getConstant(0, PtrVT); if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - // Generate a local store address - return GA; - } else { - // Generate hi/lo address pair - SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); - SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); - - return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); - } + return (!ST->usingLargeMem() + ? GA + : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero)); } else { cerr << "LowerGlobalAddress: Relocation model other than static not " << "supported.\n"; @@ -1074,7 +1042,7 @@ static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) { static SDOperand -LowerCALL(SDOperand Op, SelectionDAG &DAG) { +LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SDOperand Chain = Op.getOperand(0); #if 0 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; @@ -1184,25 +1152,35 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { GlobalValue *GV = G->getGlobal(); unsigned CalleeVT = Callee.getValueType(); + SDOperand Zero = DAG.getConstant(0, PtrVT); + SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT); - // Turn calls to targets that are defined (i.e., have bodies) into BRSL - // style calls, otherwise, external symbols are BRASL calls. - // NOTE: - // This may be an unsafe assumption for JIT and really large compilation - // units. - if (GV->isDeclaration()) { - Callee = DAG.getGlobalAddress(GV, CalleeVT); + if (!ST->usingLargeMem()) { + // Turn calls to targets that are defined (i.e., have bodies) into BRSL + // style calls, otherwise, external symbols are BRASL calls. This assumes + // that declared/defined symbols are in the same compilation unit and can + // be reached through PC-relative jumps. + // + // NOTE: + // This may be an unsafe assumption for JIT and really large compilation + // units. + if (GV->isDeclaration()) { + Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero); + } else { + Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero); + } } else { - Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, - DAG.getTargetGlobalAddress(GV, CalleeVT), - DAG.getConstant(0, PtrVT)); + // "Large memory" mode: Turn all calls into indirect calls with a X-form + // address pairs: + Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isLSAAddress(Callee, DAG)) + else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { // If this is an absolute destination address that appears to be a legal // local store address, use the munged value. Callee = SDOperand(Dest, 0); + } Ops.push_back(Chain); Ops.push_back(Callee); @@ -2468,7 +2446,7 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); case ISD::CALL: - return LowerCALL(Op, DAG); + return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index b15aed6..d9e4e7e 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -31,8 +31,9 @@ namespace llvm { Hi, ///< High address component (upper 16) Lo, ///< Low address component (lower 16) PCRelAddr, ///< Program counter relative address + AFormAddr, ///< A-form address (local store) DFormAddr, ///< D-Form address "imm($r)" - XFormAddr, ///< X-Form address "$r1($r2)" + XFormAddr, ///< X-Form address "$r($r)" LDRESULT, ///< Load result (value, chain) CALL, ///< CALL instruction diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 2a0eef7..94aa390 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -158,7 +158,7 @@ let isSimpleLoad = 1 in { def LQAr32: RI16Form<0b100001100, (outs R32C:$rT), (ins addr256k:$src), "lqa\t$rT, $src", LoadStore, - [(set R32C:$rT, (load aform_addr:$src))]>; + [(set R32C:$rT, (load aform_addr:$src))]>; def LQAf32: RI16Form<0b100001100, (outs R32FP:$rT), (ins addr256k:$src), @@ -610,6 +610,13 @@ def IOHLf32: RegConstraint<"$rS = $rT">, NoEncode<"$rS">; +def IOHLlo: + RI16Form<0b100000110, (outs R32C:$rT), (ins R32C:$rS, symbolLo:$val), + "iohl\t$rT, $val", ImmLoad, + [/* no pattern */]>, + RegConstraint<"$rS = $rT">, + NoEncode<"$rS">; + // Form select mask for bytes using immediate, used in conjunction with the // SELB instruction: @@ -2367,12 +2374,12 @@ def ROTIr32_i8: // are used here for type checking (instances where ROTQBI is used actually // use vector registers) def ROTQBYvec: - RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), "rotqby\t$rT, $rA, $rB", RotateShift, - [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R16C:$rB))]>; + [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R32C:$rB))]>; -def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R16C:$rB), - (ROTQBYvec VECREG:$rA, R16C:$rB)>; +def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R32C:$rB), + (ROTQBYvec VECREG:$rA, R32C:$rB)>; // See ROTQBY note above. def ROTQBYIvec: @@ -2720,12 +2727,12 @@ def CEQBv16i8: [/* no pattern to match: intrinsic */]>; def CEQBIr8: - RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm:$val), + RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm_i8:$val), "ceqbi\t$rT, $rA, $val", ByteOp, [/* no pattern to match: intrinsic */]>; def CEQBIv16i8: - RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm:$val), + RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm_i8:$val), "ceqbi\t$rT, $rA, $val", ByteOp, [/* no pattern to match: intrinsic */]>; @@ -2793,7 +2800,7 @@ let isCall = 1, def BRASL: BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops), "brasl\t$$lr, $func", - [(SPUcall tglobaladdr:$func)]>; + [(SPUcall (SPUaform tglobaladdr:$func, 0))]>; // Branch indirect and set link if external data. These instructions are not // actually generated, matched by an intrinsic: @@ -3468,20 +3475,21 @@ def : Pat<(i32 (anyext R16C:$rSrc)), // low parts in order to load them into a register. //===----------------------------------------------------------------------===// -def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>; -def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>; -def : Pat<(SPUdform tglobaladdr:$in, imm:$imm), (ILAlsa tglobaladdr:$in)>; -def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>; -def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>; -def : Pat<(SPUdform tconstpool:$in, imm:$imm), (ILAlsa tconstpool:$in)>; -def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>; -def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>; -def : Pat<(SPUdform tjumptable:$in, imm:$imm), (ILAlsa tjumptable:$in)>; - -// Force load of global address to a register. These forms show up in -// SPUISD::DFormAddr pseudo instructions: -def : Pat<(add tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; -def : Pat<(add tconstpool:$in, 0), (ILAlsa tglobaladdr:$in)>; -def : Pat<(add tjumptable:$in, 0), (ILAlsa tglobaladdr:$in)>; +def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>; +def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>; +def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; +def : Pat<(SPUxform tglobaladdr:$in, 0), + (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; +def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>; +def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>; +def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; +def : Pat<(SPUxform tjumptable:$in, 0), + (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; +def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>; +def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>; +def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; +/* def : Pat<(SPUxform tconstpool:$in, 0), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; */ + // Instrinsics: include "CellSDKIntrinsics.td" diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 4e78bee..ae513d2 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -186,9 +186,15 @@ def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>; // PC-relative address def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; +// A-Form local store addresses +def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; + // D-Form "imm($reg)" addresses def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>; +// X-Form "$reg($reg)" addresses +def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>; + // SPU 32-bit sign-extension to 64-bits def SPUsext32_to_64: SDNode<"SPUISD::SEXT32TO64", SDTIntExtendOp, []>; diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index a9ca3c2..2a3551d 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -140,6 +140,17 @@ def imm18 : PatLeaf<(imm), [{ return ((Value & ((1 << 19) - 1)) == Value); }]>; +def lo16 : PatLeaf<(imm), [{ + // hi16 predicate - returns true if the immediate has all zeros in the + // low order bits and is a 32-bit constant: + if (N->getValueType(0) == MVT::i32) { + uint32_t val = N->getValue(); + return ((val & 0x0000ffff) == val); + } + + return false; +}], LO16>; + def hi16 : PatLeaf<(imm), [{ // hi16 predicate - returns true if the immediate has all zeros in the // low order bits and is a 32-bit constant: @@ -411,7 +422,11 @@ def v2i64Imm: PatLeaf<(build_vector), [{ //===----------------------------------------------------------------------===// // Operand Definitions. -def s7imm: Operand<i16> { +def s7imm: Operand<i8> { + let PrintMethod = "printS7ImmOperand"; +} + +def s7imm_i8: Operand<i8> { let PrintMethod = "printS7ImmOperand"; } |